diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5aaed9c078a2866317018e4628a8df43c7e94a35..4da61f4dafbb6a84dc1f95c6b1b75b7dc5f35a11 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,3 +30,9 @@ - --show-source - --statistics files: \.py$ + +- repo: https://github.com/asottile/reorder_python_imports + rev: v2.4.0 + hooks: + - id: reorder-python-imports + exclude: (?=third_party).*(\.py)$ diff --git a/README.md b/README.md index 3761d29154618d689587dcbb997a66e1d9970605..6d8ee0691a0232a4043682437bc985c46d5bf39b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ English | [简体中文](README_ch.md)

-

QuickStart | Tutorial | Models List | Demos

+

QuickStart | Tutorial | Models List | Demos

------------------------------------------------------------------------------------------ @@ -29,7 +29,7 @@ English | [简体中文](README_ch.md) ## Introduction and Features - **PaddleHub** aims to provide developers with rich, high-quality, and directly usable pre-trained models. -- **Abundant Pre-trained Models**: 300+ pre-trained models cover the 5 major categories, including Image, Text, Audio, Video, and Industrial application. All of them are free for download and offline usage. +- **Abundant Pre-trained Models**: 360+ pre-trained models cover the 5 major categories, including Image, Text, Audio, Video, and Industrial application. All of them are free for download and offline usage. - **No Need for Deep Learning Background**: you can use AI models quickly and enjoy the dividends of the artificial intelligence era. - **Quick Model Prediction**: model prediction can be realized through a few lines of scripts to quickly experience the model effect. - **Model As Service**: one-line command to build deep learning model API service deployment capabilities. @@ -38,6 +38,7 @@ English | [简体中文](README_ch.md) ### Recent updates - **2022.02.18:** Added Huggingface Org, add spaces and models to the org: [PaddlePaddle Huggingface](https://huggingface.co/PaddlePaddle) +- **2021.12.22**,The v2.2.0 version is released. [1]More than 100 new models released,including dialog, speech, segmentation, OCR, text processing, GANs, and many other categories. The total number of pre-trained models reaches [**【360】**](https://www.paddlepaddle.org.cn/hublist). [2]Add an [indexed file](./modules/README.md) including useful information of pretrained models supported by PaddleHub. [3]Refactor README of pretrained models. - **2021.05.12:** Add an open-domain dialogue system, i.e., [plato-mini](https://www.paddlepaddle.org.cn/hubdetail?name=plato-mini&en_category=TextGeneration), to make it easy to build a chatbot in wechat with the help of the wechaty, [See Demo](https://github.com/KPatr1ck/paddlehub-wechaty-demo) - **2021.04.27:** The v2.1.0 version is released. [1] Add supports for five new models, including two high-precision semantic segmentation models based on VOC dataset and three voice classification models. [2] Enforce the transfer learning capabilities for image semantic segmentation, text semantic matching and voice classification on related datasets. [3] Add the export function APIs for two kinds of model formats, i.,e, ONNX and PaddleInference. [4] Add the support for [BentoML](https://github.com/bentoml/BentoML/), which is a cloud native framework for serving deployment. Users can easily serve pre-trained models from PaddleHub by following the [Tutorial notebooks](https://github.com/PaddlePaddle/PaddleHub/blob/release/v2.1/demo/serving/bentoml/cloud-native-model-serving-with-bentoml.ipynb). Also, see this announcement and [Release note](https://github.com/bentoml/BentoML/releases/tag/v0.12.1) from BentoML. (Many thanks to @[parano](https://github.com/parano) @[cqvu](https://github.com/cqvu) @[deehrlic](https://github.com/deehrlic) for contributing this feature in PaddleHub). [5] The total number of pre-trained models reaches **【300】**. - **2021.02.18:** The v2.0.0 version is released, making model development and debugging easier, and the finetune task is more flexible and easy to use.The ability to transfer learning for visual tasks is fully upgraded, supporting various tasks such as image classification, image coloring, and style transfer; Transformer models such as BERT, ERNIE, and RoBERTa are upgraded to dynamic graphs, supporting Fine-Tune capabilities for text classification and sequence labeling; Optimize the Serving capability, support multi-card prediction, automatic load balancing, and greatly improve performance; the new automatic data enhancement capability Auto Augment can efficiently search for data enhancement strategy combinations suitable for data sets. 61 new word vector models were added, including 51 Chinese models and 10 English models; add 4 image segmentation models, 2 depth models, 7 image generation models, and 3 text generation models, the total number of pre-trained models reaches **【274】**. @@ -46,8 +47,8 @@ English | [简体中文](README_ch.md) -## Visualization Demo [[More]](./docs/docs_en/visualization.md) -### **Computer Vision (161 models)** +## Visualization Demo [[More]](./docs/docs_en/visualization.md) [[ModelList]](./modules) +### **[Computer Vision (212 models)](./modules#Image)**
@@ -55,7 +56,7 @@ English | [简体中文](README_ch.md) - Many thanks to CopyRight@[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)、[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)、[PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN)、[AnimeGAN](https://github.com/TachibanaYoshino/AnimeGANv2)、[openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose)、[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)、[Zhengxia Zou](https://github.com/jiupinjia/SkyAR)、[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) for the pre-trained models, you can try to train your models with them. -### **Natural Language Processing (129 models)** +### **[Natural Language Processing (130 models)](./modules#Text)**
@@ -64,9 +65,37 @@ English | [简体中文](README_ch.md) -### Speech (3 models) +### [Speech (15 models)](./modules#Audio) +- ASR speech recognition algorithm, multiple algorithms are available. +- The speech recognition effect is as follows: +
+ + + + + + + + + + + + + + + + + +
Input Audio Recognition Result
+ +
+
I knocked at the door on the ancient side of the building.
+ +
+
我认为跑步最重要的就是给我带来了身体健康。
+
+ - TTS speech synthesis algorithm, multiple algorithms are available. -- Many thanks to CopyRight@[Parakeet](https://github.com/PaddlePaddle/Parakeet) for the pre-trained models, you can try to train your models with Parakeet. - Input: `Life was like a box of chocolates, you never know what you're gonna get.` - The synthesis effect is as follows:
@@ -97,7 +126,9 @@ English | [简体中文](README_ch.md)
-### Video (8 models) +- Many thanks to CopyRight@[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech) for the pre-trained models, you can try to train your models with PaddleSpeech. + +### [Video (8 models)](./modules#Video) - Short video classification trained via large-scale video datasets, supports 3000+ tag types prediction for short Form Videos. - Many thanks to CopyRight@[PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo) for the pre-trained model, you can try to train your models with PaddleVideo. - `Example: Input a short video of swimming, the algorithm can output the result of "swimming"` diff --git a/README_ch.md b/README_ch.md index 6f55ef522bc2c5787c78a56551f6dbadad40484d..85ed7430bbf74b7f36ff04b701ecf1f2a7cf3f19 100644 --- a/README_ch.md +++ b/README_ch.md @@ -4,7 +4,7 @@

-

快速开始 | 教程文档 | 模型搜索 | 演示Demo +

快速开始 | 教程文档 | 模型库 | 演示Demo

@@ -30,7 +30,7 @@ ## 简介与特性 - PaddleHub旨在为开发者提供丰富的、高质量的、直接可用的预训练模型 -- **【模型种类丰富】**: 涵盖CV、NLP、Audio、Video、工业应用主流五大品类的 350+ 预训练模型,全部开源下载,离线可运行 +- **【模型种类丰富】**: 涵盖CV、NLP、Audio、Video、工业应用主流五大品类的 **360+** 预训练模型,全部开源下载,离线可运行 - **【超低使用门槛】**:无需深度学习背景、无需数据与训练过程,可快速使用AI模型 - **【一键模型快速预测】**:通过一行命令行或者极简的Python API实现模型调用,可快速体验模型效果 - **【一键模型转服务化】**:一行命令,搭建深度学习模型API服务化部署能力 @@ -38,6 +38,7 @@ - **【跨平台兼容性】**:可运行于Linux、Windows、MacOS等多种操作系统 ## 近期更新 +- **2021.12.22**,发布v2.2.0版本。【1】新增100+高质量模型,涵盖对话、语音处理、语义分割、文字识别、文本处理、图像生成等多个领域,预训练模型总量达到[**【360+】**](https://www.paddlepaddle.org.cn/hublist);【2】新增模型[检索列表](./modules/README_ch.md),包含模型名称、网络、数据集和使用场景等信息,快速定位用户所需的模型;【3】模型文档排版优化,呈现数据集、指标、模型大小等更多实用信息。 - **2021.05.12**,新增轻量级中文对话模型[plato-mini](https://www.paddlepaddle.org.cn/hubdetail?name=plato-mini&en_category=TextGeneration),可以配合使用wechaty实现微信闲聊机器人,[参考demo](https://github.com/KPatr1ck/paddlehub-wechaty-demo) - **2021.04.27**,发布v2.1.0版本。【1】新增基于VOC数据集的高精度语义分割模型2个,语音分类模型3个。【2】新增图像语义分割、文本语义匹配、语音分类等相关任务的Fine-Tune能力以及相关任务数据集;完善部署能力:【3】新增ONNX和PaddleInference等模型格式的导出功能。【4】新增[BentoML](https://github.com/bentoml/BentoML) 云原生服务化部署能力,可以支持统一的多框架模型管理和模型部署的工作流,[详细教程](https://github.com/PaddlePaddle/PaddleHub/blob/release/v2.1/demo/serving/bentoml/cloud-native-model-serving-with-bentoml.ipynb). 更多内容可以参考BentoML 最新 v0.12.1 [Releasenote](https://github.com/bentoml/BentoML/releases/tag/v0.12.1).(感谢@[parano](https://github.com/parano) @[cqvu](https://github.com/cqvu) @[deehrlic](https://github.com/deehrlic))的贡献与支持。【5】预训练模型总量达到[**【300】**](https://www.paddlepaddle.org.cn/hublist)个。 - **2021.02.18**,发布v2.0.0版本,【1】模型开发调试更简单,finetune接口更加灵活易用。视觉类任务迁移学习能力全面升级,支持[图像分类](./demo/image_classification/README.md)、[图像着色](./demo/colorization/README.md)、[风格迁移](./demo/style_transfer/README.md)等多种任务;BERT、ERNIE、RoBERTa等Transformer类模型升级至动态图,支持[文本分类](./demo/text_classification/README.md)、[序列标注](./demo/sequence_labeling/README.md)的Fine-Tune能力;【2】优化服务化部署Serving能力,支持多卡预测、自动负载均衡,性能大幅度提升;【3】新增自动数据增强能力[Auto Augment](./demo/autoaug/README.md),能高效地搜索适合数据集的数据增强策略组合。【4】新增[词向量模型](./modules/text/embedding)61个,其中包含中文模型51个,英文模型10个;新增[图像分割](./modules/thirdparty/image/semantic_segmentation)模型4个、[深度模型](./modules/thirdparty/image/depth_estimation)2个、[图像生成](./modules/thirdparty/image/Image_gan/style_transfer)模型7个、[文本生成](./modules/thirdparty/text/text_generation)模型3个。【5】预训练模型总量达到[**【274】**](https://www.paddlepaddle.org.cn/hublist) 个。 @@ -47,9 +48,9 @@ -## **精品模型效果展示[【更多】](./docs/docs_ch/visualization.md)** +## **精品模型效果展示[【更多】](./docs/docs_ch/visualization.md)[【模型库】](./modules/README_ch.md)** -### **图像类(161个)** +### **[图像类(212个)](./modules/README_ch.md#图像)** - 包括图像分类、人脸检测、口罩检测、车辆检测、人脸/人体/手部关键点检测、人像分割、80+语言文本识别、图像超分/上色/动漫化等
@@ -58,7 +59,7 @@ - 感谢CopyRight@[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR)、[PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection)、[PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN)、[AnimeGAN](https://github.com/TachibanaYoshino/AnimeGANv2)、[openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose)、[PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg)、[Zhengxia Zou](https://github.com/jiupinjia/SkyAR)、[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) 提供相关预训练模型,训练能力开放,欢迎体验。 -### **文本类(129个)** +### **[文本类(130个)](./modules/README_ch.md#文本)** - 包括中文分词、词性标注与命名实体识别、句法分析、AI写诗/对联/情话/藏头诗、中文的评论情感分析、中文色情文本审核等
@@ -67,9 +68,37 @@ - 感谢CopyRight@[ERNIE](https://github.com/PaddlePaddle/ERNIE)、[LAC](https://github.com/baidu/LAC)、[DDParser](https://github.com/baidu/DDParser)提供相关预训练模型,训练能力开放,欢迎体验。 -### **语音类(3个)** +### **[语音类(15个)](./modules/README_ch.md#语音)** +- ASR语音识别算法,多种算法可选 +- 语音识别效果如下: +
+ + + + + + + + + + + + + + + + + +
Input Audio Recognition Result
+ +
+
I knocked at the door on the ancient side of the building.
+ +
+
我认为跑步最重要的就是给我带来了身体健康。
+
+ - TTS语音合成算法,多种算法可选 -- 感谢CopyRight@[Parakeet](https://github.com/PaddlePaddle/Parakeet)提供预训练模型,训练能力开放,欢迎体验。 - 输入:`Life was like a box of chocolates, you never know what you're gonna get.` - 合成效果如下:
@@ -100,7 +129,9 @@
-### **视频类(8个)** +- 感谢CopyRight@[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)提供预训练模型,训练能力开放,欢迎体验。 + +### **[视频类(8个)](./modules/README_ch.md#视频)** - 包含短视频分类,支持3000+标签种类,可输出TOP-K标签,多种算法可选。 - 感谢CopyRight@[PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo)提供预训练模型,训练能力开放,欢迎体验。 - `举例:输入一段游泳的短视频,算法可以输出"游泳"结果` diff --git a/demo/image_classification/README.md b/demo/image_classification/README.md index 489df7782630304f61208185a2b775a7bd83d26e..1ccb9ba5845ff0760961476d8950a9e1a3a0ce33 100644 --- a/demo/image_classification/README.md +++ b/demo/image_classification/README.md @@ -8,6 +8,18 @@ $ hub run resnet50_vd_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 ``` +## 脚本预测 + +```python +import paddle +import paddlehub as hub + +if __name__ == '__main__': + + model = hub.Module(name='resnet50_vd_imagenet_ssld',) + result = model.predict([PATH/TO/IMAGE]) +``` + ## 如何开始Fine-tune 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用resnet50_vd_imagenet_ssld对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 diff --git a/demo/sequence_labeling/README.md b/demo/sequence_labeling/README.md index 3e04afe4fe0040769ffc74dbbfd6ecd547bc2cea..5453b3a5046be6f7815c2292fe63dee490336cf7 100644 --- a/demo/sequence_labeling/README.md +++ b/demo/sequence_labeling/README.md @@ -91,10 +91,12 @@ train_dataset = hub.datasets.MSRA_NER( tokenizer=model.get_tokenizer(), max_seq_len=128, mode='train') dev_dataset = hub.datasets.MSRA_NER( tokenizer=model.get_tokenizer(), max_seq_len=128, mode='dev') +test_dataset = hub.datasets.MSRA_NER( + tokenizer=model.get_tokenizer(), max_seq_len=128, mode='test') ``` * `tokenizer`:表示该module所需用到的tokenizer,其将对输入文本完成切词,并转化成module运行所需模型输入格式。 -* `mode`:选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 +* `mode`:选择数据模式,可选项有 `train`, `test`, `dev`, 默认为`train`。 * `max_seq_len`:ERNIE/BERT模型使用的最大序列长度,若出现显存不足,请适当调低这一参数。 预训练模型ERNIE对中文数据的处理是以字为单位,tokenizer作用为将原始输入文本转化成模型model可以接受的输入数据形式。 PaddleHub 2.0中的各种预训练模型已经内置了相应的tokenizer,可以通过`model.get_tokenizer`方法获取。 @@ -106,7 +108,7 @@ dev_dataset = hub.datasets.MSRA_NER( ```python optimizer = paddle.optimizer.AdamW(learning_rate=5e-5, parameters=model.parameters()) -trainer = hub.Trainer(model, optimizer, checkpoint_dir='test_ernie_token_cls', use_gpu=False) +trainer = hub.Trainer(model, optimizer, checkpoint_dir='test_ernie_token_cls', use_gpu=True) trainer.train(train_dataset, epochs=3, batch_size=32, eval_dataset=dev_dataset) diff --git a/demo/style_transfer/README.md b/demo/style_transfer/README.md index d05184898623ecd4aa5f949bcd945c7b62e5ba3c..bf3caa6c26a754452da5a3a66d8ec9d890956a1b 100644 --- a/demo/style_transfer/README.md +++ b/demo/style_transfer/README.md @@ -8,6 +8,17 @@ $ hub run msgnet --input_path "/PATH/TO/ORIGIN/IMAGE" --style_path "/PATH/TO/STYLE/IMAGE" ``` +## 脚本预测 + +```python +import paddle +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='msgnet') + result = model.predict(origin=["venice-boat.jpg"], style="candy.jpg", visualization=True, save_path ='style_tranfer') +``` + ## 如何开始Fine-tune 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用msgnet模型对[MiniCOCO](../../docs/reference/datasets.md#class-hubdatasetsMiniCOCO)等数据集进行Fine-tune。 @@ -164,4 +175,4 @@ https://github.com/zhanghang1989/PyTorch-Multi-Style-Transfer paddlepaddle >= 2.0.0rc -paddlehub >= 2.0.0 +paddlehub >= 2.0.0 \ No newline at end of file diff --git a/demo/text_classification/README.md b/demo/text_classification/README.md index d4fae926ec864d64fb3710dfe9f90a23ba58dca2..2de5b98c06c9e97e0819ffb2e7f9be660d94e8d0 100644 --- a/demo/text_classification/README.md +++ b/demo/text_classification/README.md @@ -80,10 +80,12 @@ train_dataset = hub.datasets.ChnSentiCorp( tokenizer=model.get_tokenizer(), max_seq_len=128, mode='train') dev_dataset = hub.datasets.ChnSentiCorp( tokenizer=model.get_tokenizer(), max_seq_len=128, mode='dev') +test_dataset = hub.datasets.ChnSentiCorp( + tokenizer=model.get_tokenizer(), max_seq_len=128, mode='test') ``` * `tokenizer`:表示该module所需用到的tokenizer,其将对输入文本完成切词,并转化成module运行所需模型输入格式。 -* `mode`:选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 +* `mode`:选择数据模式,可选项有 `train`, `test`, `dev`, 默认为`train`。 * `max_seq_len`:ERNIE/BERT模型使用的最大序列长度,若出现显存不足,请适当调低这一参数。 预训练模型ERNIE对中文数据的处理是以字为单位,tokenizer作用为将原始输入文本转化成模型model可以接受的输入数据形式。 PaddleHub 2.0中的各种预训练模型已经内置了相应的tokenizer,可以通过`model.get_tokenizer`方法获取。 @@ -95,7 +97,7 @@ dev_dataset = hub.datasets.ChnSentiCorp( ```python optimizer = paddle.optimizer.Adam(learning_rate=5e-5, parameters=model.parameters()) -trainer = hub.Trainer(model, optimizer, checkpoint_dir='test_ernie_text_cls') +trainer = hub.Trainer(model, optimizer, checkpoint_dir='test_ernie_text_cls', use_gpu=True) trainer.train(train_dataset, epochs=3, batch_size=32, eval_dataset=dev_dataset) diff --git a/modules/README.md b/modules/README.md index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..738eb51974efe18abf08256a3f0b3409b28002ad 100644 --- a/modules/README.md +++ b/modules/README.md @@ -0,0 +1,547 @@ +English | [简体中文](README_ch.md) + +# CONTENTS +|[Image](#Image) (212)|[Text](#Text) (130)|[Audio](#Audio) (15)|[Video](#Video) (8)|[Industrial Application](#Industrial-Application) (1)| +|--|--|--|--|--| +|[Image Classification](#Image-Classification) (108)|[Text Generation](#Text-Generation) (17)| [Voice Cloning](#Voice-Cloning) (2)|[Video Classification](#Video-Classification) (5)| [Meter Detection](#Meter-Detection) (1)| +|[Image Generation](#Image-Generation) (26)|[Word Embedding](#Word-Embedding) (62)|[Text to Speech](#Text-to-Speech) (5)|[Video Editing](#Video-Editing) (1)|-| +|[Keypoint Detection](#Keypoint-Detection) (5)|[Machine Translation](#Machine-Translation) (2)|[Automatic Speech Recognition](#Automatic-Speech-Recognition) (5)|[Multiple Object tracking](#Multiple-Object-tracking) (2)|-| +|[Semantic Segmentation](#Semantic-Segmentation) (25)|[Language Model](#Language-Model) (30)|[Audio Classification](#Audio-Classification) (3)| -|-| +|[Face Detection](#Face-Detection) (7)|[Sentiment Analysis](#Sentiment-Analysis) (7)|-|-|-| +|[Text Recognition](#Text-Recognition) (17)|[Syntactic Analysis](#Syntactic-Analysis) (1)|-|-|-| +|[Image Editing](#Image-Editing) (8)|[Simultaneous Translation](#Simultaneous-Translation) (5)|-|-|-| +|[Instance Segmentation](#Instance-Segmentation) (1)|[Lexical Analysis](#Lexical-Analysis) (2)|-|-|-| +|[Object Detection](#Object-Detection) (13)|[Punctuation Restoration](#Punctuation-Restoration) (1)|-|-|-| +|[Depth Estimation](#Depth-Estimation) (2)|[Text Review](#Text-Review) (3)|-|-|-| + +## Image + - ### Image Classification + +
expand
+ +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[DriverStatusRecognition](image/classification/DriverStatusRecognition)|MobileNetV3_small_ssld|分心司机检测数据集|| +|[mobilenet_v2_animals](image/classification/mobilenet_v2_animals)|MobileNet_v2|百度自建动物数据集|| +|[repvgg_a1_imagenet](image/classification/repvgg_a1_imagenet)|RepVGG|ImageNet-2012|| +|[repvgg_a0_imagenet](image/classification/repvgg_a0_imagenet)|RepVGG|ImageNet-2012|| +|[resnext152_32x4d_imagenet](image/classification/resnext152_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[resnet_v2_152_imagenet](image/classification/resnet_v2_152_imagenet)|ResNet V2|ImageNet-2012|| +|[resnet50_vd_animals](image/classification/resnet50_vd_animals)|ResNet50_vd|百度自建动物数据集|| +|[food_classification](image/classification/food_classification)|ResNet50_vd_ssld|美食数据集|| +|[mobilenet_v3_large_imagenet_ssld](image/classification/mobilenet_v3_large_imagenet_ssld)|Mobilenet_v3_large|ImageNet-2012|| +|[resnext152_vd_32x4d_imagenet](image/classification/resnext152_vd_32x4d_imagenet)|||| +|[ghostnet_x1_3_imagenet_ssld](image/classification/ghostnet_x1_3_imagenet_ssld)|GhostNet|ImageNet-2012|| +|[rexnet_1_5_imagenet](image/classification/rexnet_1_5_imagenet)|ReXNet|ImageNet-2012|| +|[resnext50_64x4d_imagenet](image/classification/resnext50_64x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[resnext101_64x4d_imagenet](image/classification/resnext101_64x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[efficientnetb0_imagenet](image/classification/efficientnetb0_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb1_imagenet](image/classification/efficientnetb1_imagenet)|EfficientNet|ImageNet-2012|| +|[mobilenet_v2_imagenet_ssld](image/classification/mobilenet_v2_imagenet_ssld)|Mobilenet_v2|ImageNet-2012|| +|[resnet50_vd_dishes](image/classification/resnet50_vd_dishes)|ResNet50_vd|百度自建菜品数据集|| +|[pnasnet_imagenet](image/classification/pnasnet_imagenet)|PNASNet|ImageNet-2012|| +|[rexnet_2_0_imagenet](image/classification/rexnet_2_0_imagenet)|ReXNet|ImageNet-2012|| +|[SnakeIdentification](image/classification/SnakeIdentification)|ResNet50_vd_ssld|蛇种数据集|| +|[hrnet40_imagenet](image/classification/hrnet40_imagenet)|HRNet|ImageNet-2012|| +|[resnet_v2_34_imagenet](image/classification/resnet_v2_34_imagenet)|ResNet V2|ImageNet-2012|| +|[mobilenet_v2_dishes](image/classification/mobilenet_v2_dishes)|MobileNet_v2|百度自建菜品数据集|| +|[resnext101_vd_32x4d_imagenet](image/classification/resnext101_vd_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[repvgg_b2g4_imagenet](image/classification/repvgg_b2g4_imagenet)|RepVGG|ImageNet-2012|| +|[fix_resnext101_32x48d_wsl_imagenet](image/classification/fix_resnext101_32x48d_wsl_imagenet)|ResNeXt|ImageNet-2012|| +|[vgg13_imagenet](image/classification/vgg13_imagenet)|VGG|ImageNet-2012|| +|[se_resnext101_32x4d_imagenet](image/classification/se_resnext101_32x4d_imagenet)|SE_ResNeXt|ImageNet-2012|| +|[hrnet30_imagenet](image/classification/hrnet30_imagenet)|HRNet|ImageNet-2012|| +|[ghostnet_x1_3_imagenet](image/classification/ghostnet_x1_3_imagenet)|GhostNet|ImageNet-2012|| +|[dpn107_imagenet](image/classification/dpn107_imagenet)|DPN|ImageNet-2012|| +|[densenet161_imagenet](image/classification/densenet161_imagenet)|DenseNet|ImageNet-2012|| +|[vgg19_imagenet](image/classification/vgg19_imagenet)|vgg19_imagenet|ImageNet-2012|| +|[mobilenet_v2_imagenet](image/classification/mobilenet_v2_imagenet)|Mobilenet_v2|ImageNet-2012|| +|[resnet50_vd_10w](image/classification/resnet50_vd_10w)|ResNet_vd|百度自建数据集|| +|[resnet_v2_101_imagenet](image/classification/resnet_v2_101_imagenet)|ResNet V2 101|ImageNet-2012|| +|[darknet53_imagenet](image/classification/darknet53_imagenet)|DarkNet|ImageNet-2012|| +|[se_resnext50_32x4d_imagenet](image/classification/se_resnext50_32x4d_imagenet)|SE_ResNeXt|ImageNet-2012|| +|[se_hrnet64_imagenet_ssld](image/classification/se_hrnet64_imagenet_ssld)|HRNet|ImageNet-2012|| +|[resnext101_32x16d_wsl](image/classification/resnext101_32x16d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[hrnet18_imagenet](image/classification/hrnet18_imagenet)|HRNet|ImageNet-2012|| +|[spinalnet_res101_gemstone](image/classification/spinalnet_res101_gemstone)|resnet101|gemstone|| +|[densenet264_imagenet](image/classification/densenet264_imagenet)|DenseNet|ImageNet-2012|| +|[resnext50_vd_32x4d_imagenet](image/classification/resnext50_vd_32x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[SpinalNet_Gemstones](image/classification/SpinalNet_Gemstones)|||| +|[spinalnet_vgg16_gemstone](image/classification/spinalnet_vgg16_gemstone)|vgg16|gemstone|| +|[xception71_imagenet](image/classification/xception71_imagenet)|Xception|ImageNet-2012|| +|[repvgg_b2_imagenet](image/classification/repvgg_b2_imagenet)|RepVGG|ImageNet-2012|| +|[dpn68_imagenet](image/classification/dpn68_imagenet)|DPN|ImageNet-2012|| +|[alexnet_imagenet](image/classification/alexnet_imagenet)|AlexNet|ImageNet-2012|| +|[rexnet_1_3_imagenet](image/classification/rexnet_1_3_imagenet)|ReXNet|ImageNet-2012|| +|[hrnet64_imagenet](image/classification/hrnet64_imagenet)|HRNet|ImageNet-2012|| +|[efficientnetb7_imagenet](image/classification/efficientnetb7_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb0_small_imagenet](image/classification/efficientnetb0_small_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb6_imagenet](image/classification/efficientnetb6_imagenet)|EfficientNet|ImageNet-2012|| +|[hrnet48_imagenet](image/classification/hrnet48_imagenet)|HRNet|ImageNet-2012|| +|[rexnet_3_0_imagenet](image/classification/rexnet_3_0_imagenet)|ReXNet|ImageNet-2012|| +|[shufflenet_v2_imagenet](image/classification/shufflenet_v2_imagenet)|ShuffleNet V2|ImageNet-2012|| +|[ghostnet_x0_5_imagenet](image/classification/ghostnet_x0_5_imagenet)|GhostNet|ImageNet-2012|| +|[inception_v4_imagenet](image/classification/inception_v4_imagenet)|Inception_V4|ImageNet-2012|| +|[resnext101_vd_64x4d_imagenet](image/classification/resnext101_vd_64x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[densenet201_imagenet](image/classification/densenet201_imagenet)|DenseNet|ImageNet-2012|| +|[vgg16_imagenet](image/classification/vgg16_imagenet)|VGG|ImageNet-2012|| +|[mobilenet_v3_small_imagenet_ssld](image/classification/mobilenet_v3_small_imagenet_ssld)|Mobilenet_v3_Small|ImageNet-2012|| +|[hrnet18_imagenet_ssld](image/classification/hrnet18_imagenet_ssld)|HRNet|ImageNet-2012|| +|[resnext152_64x4d_imagenet](image/classification/resnext152_64x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[efficientnetb3_imagenet](image/classification/efficientnetb3_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb2_imagenet](image/classification/efficientnetb2_imagenet)|EfficientNet|ImageNet-2012|| +|[repvgg_b1g4_imagenet](image/classification/repvgg_b1g4_imagenet)|RepVGG|ImageNet-2012|| +|[resnext101_32x4d_imagenet](image/classification/resnext101_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[resnext50_32x4d_imagenet](image/classification/resnext50_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[repvgg_a2_imagenet](image/classification/repvgg_a2_imagenet)|RepVGG|ImageNet-2012|| +|[resnext152_vd_64x4d_imagenet](image/classification/resnext152_vd_64x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[xception41_imagenet](image/classification/xception41_imagenet)|Xception|ImageNet-2012|| +|[googlenet_imagenet](image/classification/googlenet_imagenet)|GoogleNet|ImageNet-2012|| +|[resnet50_vd_imagenet_ssld](image/classification/resnet50_vd_imagenet_ssld)|ResNet_vd|ImageNet-2012|| +|[repvgg_b1_imagenet](image/classification/repvgg_b1_imagenet)|RepVGG|ImageNet-2012|| +|[repvgg_b0_imagenet](image/classification/repvgg_b0_imagenet)|RepVGG|ImageNet-2012|| +|[resnet_v2_50_imagenet](image/classification/resnet_v2_50_imagenet)|ResNet V2|ImageNet-2012|| +|[rexnet_1_0_imagenet](image/classification/rexnet_1_0_imagenet)|ReXNet|ImageNet-2012|| +|[resnet_v2_18_imagenet](image/classification/resnet_v2_18_imagenet)|ResNet V2|ImageNet-2012|| +|[resnext101_32x8d_wsl](image/classification/resnext101_32x8d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[efficientnetb4_imagenet](image/classification/efficientnetb4_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb5_imagenet](image/classification/efficientnetb5_imagenet)|EfficientNet|ImageNet-2012|| +|[repvgg_b1g2_imagenet](image/classification/repvgg_b1g2_imagenet)|RepVGG|ImageNet-2012|| +|[resnext101_32x48d_wsl](image/classification/resnext101_32x48d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[resnet50_vd_wildanimals](image/classification/resnet50_vd_wildanimals)|ResNet_vd|IFAW 自建野生动物数据集|| +|[nasnet_imagenet](image/classification/nasnet_imagenet)|NASNet|ImageNet-2012|| +|[se_resnet18_vd_imagenet](image/classification/se_resnet18_vd_imagenet)|||| +|[spinalnet_res50_gemstone](image/classification/spinalnet_res50_gemstone)|resnet50|gemstone|| +|[resnext50_vd_64x4d_imagenet](image/classification/resnext50_vd_64x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[resnext101_32x32d_wsl](image/classification/resnext101_32x32d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[dpn131_imagenet](image/classification/dpn131_imagenet)|DPN|ImageNet-2012|| +|[xception65_imagenet](image/classification/xception65_imagenet)|Xception|ImageNet-2012|| +|[repvgg_b3g4_imagenet](image/classification/repvgg_b3g4_imagenet)|RepVGG|ImageNet-2012|| +|[marine_biometrics](image/classification/marine_biometrics)|ResNet50_vd_ssld|Fish4Knowledge|| +|[res2net101_vd_26w_4s_imagenet](image/classification/res2net101_vd_26w_4s_imagenet)|Res2Net|ImageNet-2012|| +|[dpn98_imagenet](image/classification/dpn98_imagenet)|DPN|ImageNet-2012|| +|[resnet18_vd_imagenet](image/classification/resnet18_vd_imagenet)|ResNet_vd|ImageNet-2012|| +|[densenet121_imagenet](image/classification/densenet121_imagenet)|DenseNet|ImageNet-2012|| +|[vgg11_imagenet](image/classification/vgg11_imagenet)|VGG|ImageNet-2012|| +|[hrnet44_imagenet](image/classification/hrnet44_imagenet)|HRNet|ImageNet-2012|| +|[densenet169_imagenet](image/classification/densenet169_imagenet)|DenseNet|ImageNet-2012|| +|[hrnet32_imagenet](image/classification/hrnet32_imagenet)|HRNet|ImageNet-2012|| +|[dpn92_imagenet](image/classification/dpn92_imagenet)|DPN|ImageNet-2012|| +|[ghostnet_x1_0_imagenet](image/classification/ghostnet_x1_0_imagenet)|GhostNet|ImageNet-2012|| +|[hrnet48_imagenet_ssld](image/classification/hrnet48_imagenet_ssld)|HRNet|ImageNet-2012|| + +
+ + + - ### Image Generation + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[pixel2style2pixel](image/Image_gan/gan/pixel2style2pixel/)|Pixel2Style2Pixel|-|人脸转正| +|[stgan_bald](image/Image_gan/gan/stgan_bald/)|STGAN|CelebA|秃头生成器| +|[styleganv2_editing](image/Image_gan/gan/styleganv2_editing)|StyleGAN V2|-|人脸编辑| +|[wav2lip](image/Image_gan/gan/wav2lip)|wav2lip|LRS2|唇形生成| +|[attgan_celeba](image/Image_gan/attgan_celeba/)|AttGAN|Celeba|人脸编辑| +|[cyclegan_cityscapes](image/Image_gan/cyclegan_cityscapes)|CycleGAN|Cityscapes|实景图和语义分割结果互相转换| +|[stargan_celeba](image/Image_gan/stargan_celeba)|StarGAN|Celeba|人脸编辑| +|[stgan_celeba](image/Image_gan/stgan_celeba/)|STGAN|Celeba|人脸编辑| +|[ID_Photo_GEN](image/Image_gan/style_transfer/ID_Photo_GEN)|HRNet_W18|-|证件照生成| +|[Photo2Cartoon](image/Image_gan/style_transfer/Photo2Cartoon)|U-GAT-IT|cartoon_data|人脸卡通化| +|[U2Net_Portrait](image/Image_gan/style_transfer/U2Net_Portrait)|U^2Net|-|人脸素描化| +|[UGATIT_100w](image/Image_gan/style_transfer/UGATIT_100w)|U-GAT-IT|selfie2anime|人脸动漫化| +|[UGATIT_83w](image/Image_gan/style_transfer/UGATIT_83w)|U-GAT-IT|selfie2anime|人脸动漫化| +|[UGATIT_92w](image/Image_gan/style_transfer/UGATIT_92w)| U-GAT-IT|selfie2anime|人脸动漫化| +|[animegan_v1_hayao_60](image/Image_gan/style_transfer/animegan_v1_hayao_60)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| +|[animegan_v2_hayao_64](image/Image_gan/style_transfer/animegan_v2_hayao_64)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| +|[animegan_v2_hayao_99](image/Image_gan/style_transfer/animegan_v2_hayao_99)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| +|[animegan_v2_paprika_54](image/Image_gan/style_transfer/animegan_v2_paprika_54)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_paprika_74](image/Image_gan/style_transfer/animegan_v2_paprika_74)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_paprika_97](image/Image_gan/style_transfer/animegan_v2_paprika_97)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_paprika_98](image/Image_gan/style_transfer/animegan_v2_paprika_98)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_shinkai_33](image/Image_gan/style_transfer/animegan_v2_shinkai_33)|AnimeGAN|Your Name, Weathering with you|图像风格迁移-新海诚| +|[animegan_v2_shinkai_53](image/Image_gan/style_transfer/animegan_v2_shinkai_53)|AnimeGAN|Your Name, Weathering with you|图像风格迁移-新海诚| +|[msgnet](image/Image_gan/style_transfer/msgnet)|msgnet|COCO2014| +|[stylepro_artistic](image/Image_gan/style_transfer/stylepro_artistic)|StyleProNet|MS-COCO + WikiArt|艺术风格迁移| +|stylegan_ffhq|StyleGAN|FFHQ|图像风格迁移| + + - ### Keypoint Detection + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[face_landmark_localization](image/keypoint_detection/face_landmark_localization)|Face_Landmark|AFW/AFLW|人脸关键点检测| +|[hand_pose_localization](image/keypoint_detection/hand_pose_localization)|-|MPII, NZSL|手部关键点检测| +|[openpose_body_estimation](image/keypoint_detection/openpose_body_estimation)|two-branch multi-stage CNN|MPII, COCO 2016|肢体关键点检测| +|[human_pose_estimation_resnet50_mpii](image/keypoint_detection/human_pose_estimation_resnet50_mpii)|Pose_Resnet50|MPII|人体骨骼关键点检测 +|[openpose_hands_estimation](image/keypoint_detection/openpose_hands_estimation)|-|MPII, NZSL|手部关键点检测| + + - ### Semantic Segmentation + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[deeplabv3p_xception65_humanseg](image/semantic_segmentation/deeplabv3p_xception65_humanseg)|deeplabv3p|百度自建数据集|人像分割| +|[humanseg_server](image/semantic_segmentation/humanseg_server)|deeplabv3p|百度自建数据集|人像分割| +|[humanseg_mobile](image/semantic_segmentation/humanseg_mobile)|hrnet|百度自建数据集|人像分割-移动端前置摄像头| +|[humanseg_lite](image/semantic_segmentation/umanseg_lite)|shufflenet|百度自建数据集|轻量级人像分割-移动端实时| +|[ExtremeC3_Portrait_Segmentation](image/semantic_segmentation/ExtremeC3_Portrait_Segmentation)|ExtremeC3|EG1800, Baidu fashion dataset|轻量化人像分割| +|[SINet_Portrait_Segmentation](image/semantic_segmentation/SINet_Portrait_Segmentation)|SINet|EG1800, Baidu fashion dataset|轻量化人像分割| +|[FCN_HRNet_W18_Face_Seg](image/semantic_segmentation/FCN_HRNet_W18_Face_Seg)|FCN_HRNet_W18|-|人像分割| +|[ace2p](image/semantic_segmentation/ace2p)|ACE2P|LIP|人体解析| +|[Pneumonia_CT_LKM_PP](image/semantic_segmentation/Pneumonia_CT_LKM_PP)|U-NET+|连心医疗授权脱敏数据集|肺炎CT影像分析| +|[Pneumonia_CT_LKM_PP_lung](image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung)|U-NET+|连心医疗授权脱敏数据集|肺炎CT影像分析| +|[ocrnet_hrnetw18_voc](image/semantic_segmentation/ocrnet_hrnetw18_voc)|ocrnet, hrnet|PascalVoc2012| +|[U2Net](image/semantic_segmentation/U2Net)|U^2Net|-|图像前景背景分割| +|[U2Netp](image/semantic_segmentation/U2Netp)|U^2Net|-|图像前景背景分割| +|[Extract_Line_Draft](image/semantic_segmentation/Extract_Line_Draft)|UNet|Pixiv|线稿提取| +|[unet_cityscapes](image/semantic_segmentation/unet_cityscapes)|UNet|cityscapes| +|[ocrnet_hrnetw18_cityscapes](image/semantic_segmentation/ocrnet_hrnetw18_cityscapes)|ocrnet_hrnetw18|cityscapes| +|[hardnet_cityscapes](image/semantic_segmentation/hardnet_cityscapes)|hardnet|cityscapes| +|[fcn_hrnetw48_voc](image/semantic_segmentation/fcn_hrnetw48_voc)|fcn_hrnetw48|PascalVoc2012| +|[fcn_hrnetw48_cityscapes](image/semantic_segmentation/fcn_hrnetw48_cityscapes)|fcn_hrnetw48|cityscapes| +|[fcn_hrnetw18_voc](image/semantic_segmentation/fcn_hrnetw18_voc)|fcn_hrnetw18|PascalVoc2012| +|[fcn_hrnetw18_cityscapes](image/semantic_segmentation/fcn_hrnetw18_cityscapes)|fcn_hrnetw18|cityscapes| +|[fastscnn_cityscapes](image/semantic_segmentation/fastscnn_cityscapes)|fastscnn|cityscapes| +|[deeplabv3p_resnet50_voc](image/semantic_segmentation/deeplabv3p_resnet50_voc)|deeplabv3p, resnet50|PascalVoc2012| +|[deeplabv3p_resnet50_cityscapes](image/semantic_segmentation/deeplabv3p_resnet50_cityscapes)|deeplabv3p, resnet50|cityscapes| +|[bisenetv2_cityscapes](image/semantic_segmentation/bisenetv2_cityscapes)|bisenetv2|cityscapes| + + + + - ### Face Detection + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[pyramidbox_lite_mobile](image/face_detection/pyramidbox_lite_mobile)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸检测-移动端| +|[pyramidbox_lite_mobile_mask](image/face_detection/pyramidbox_lite_mobile_mask)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸口罩检测-移动端| +|[pyramidbox_lite_server_mask](image/face_detection/pyramidbox_lite_server_mask)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸口罩检测| +|[ultra_light_fast_generic_face_detector_1mb_640](image/face_detection/ultra_light_fast_generic_face_detector_1mb_640)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE数据集|轻量级通用人脸检测-低算力设备| +|[ultra_light_fast_generic_face_detector_1mb_320](image/face_detection/ultra_light_fast_generic_face_detector_1mb_320)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE数据集|轻量级通用人脸检测-低算力设备| +|[pyramidbox_lite_server](image/face_detection/pyramidbox_lite_server)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸检测| +|[pyramidbox_face_detection](image/face_detection/pyramidbox_face_detection)|PyramidBox|WIDER FACE数据集|人脸检测| + + - ### Text Recognition + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[chinese_ocr_db_crnn_mobile](image/text_recognition/chinese_ocr_db_crnn_mobile)|Differentiable Binarization+RCNN|icdar2015数据集|中文文字识别|[chinese_text_detection_db_mobile](image/text_recognition/chinese_text_detection_db_mobile)|Differentiable Binarization|icdar2015数据集|中文文本检测| +|[chinese_text_detection_db_server](image/text_recognition/chinese_text_detection_db_server)|Differentiable Binarization|icdar2015数据集|中文文本检测| +|[chinese_ocr_db_crnn_server](image/text_recognition/chinese_ocr_db_crnn_server)|Differentiable Binarization+RCNN|icdar2015数据集|中文文字识别| +|[Vehicle_License_Plate_Recognition](image/text_recognition/Vehicle_License_Plate_Recognition)|-|CCPD|车牌识别| +|[chinese_cht_ocr_db_crnn_mobile](image/text_recognition/chinese_cht_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|繁体中文文字识别| +|[japan_ocr_db_crnn_mobile](image/text_recognition/japan_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|日文文字识别| +|[korean_ocr_db_crnn_mobile](image/text_recognition/korean_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|韩文文字识别| +|[german_ocr_db_crnn_mobile](image/text_recognition/german_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|德文文字识别| +|[french_ocr_db_crnn_mobile](image/text_recognition/french_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|法文文字识别| +|[latin_ocr_db_crnn_mobile](image/text_recognition/latin_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|拉丁文文字识别| +|[cyrillic_ocr_db_crnn_mobile](image/text_recognition/cyrillic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|斯拉夫文文字识别| +|[multi_languages_ocr_db_crnn](image/text_recognition/multi_languages_ocr_db_crnn)|Differentiable Binarization+RCNN|icdar2015数据集|多语言文字识别| +|[kannada_ocr_db_crnn_mobile](image/text_recognition/kannada_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|卡纳达文文字识别| +|[arabic_ocr_db_crnn_mobile](image/text_recognition/arabic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|阿拉伯文文字识别| +|[telugu_ocr_db_crnn_mobile](image/text_recognition/telugu_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|泰卢固文文字识别| +|[devanagari_ocr_db_crnn_mobile](image/text_recognition/devanagari_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|梵文文字识别| +|[tamil_ocr_db_crnn_mobile](image/text_recognition/tamil_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|泰米尔文文字识别| + + + - ### Image Editing + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[realsr](image/Image_editing/super_resolution/realsr)|LP-KPN|RealSR dataset|图像/视频超分-4倍| +|[deoldify](image/Image_editing/colorization/deoldify)|GAN|ILSVRC 2012|黑白照片/视频着色| +|[photo_restoration](image/Image_editing/colorization/photo_restoration)|基于deoldify和realsr模型|-|老照片修复| +|[user_guided_colorization](image/Image_editing/colorization/user_guided_colorization)|siggraph|ILSVRC 2012|图像着色| +|[falsr_c](image/Image_editing/super_resolution/falsr_c)|falsr_c| DIV2k|轻量化超分-2倍| +|[dcscn](image/Image_editing/super_resolution/dcscn)|dcscn| DIV2k|轻量化超分-2倍| +|[falsr_a](image/Image_editing/super_resolution/falsr_a)|falsr_a| DIV2k|轻量化超分-2倍| +|[falsr_b](image/Image_editing/super_resolution/falsr_b)|falsr_b|DIV2k|轻量化超分-2倍| + + - ### Instance Segmentation + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[solov2](image/instance_segmentation/solov2)|-|COCO2014|实例分割| + + - ### Object Detection + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[faster_rcnn_resnet50_coco2017](image/object_detection/faster_rcnn_resnet50_coco2017)|faster_rcnn|COCO2017|| +|[ssd_vgg16_512_coco2017](image/object_detection/ssd_vgg16_512_coco2017)|SSD|COCO2017|| +|[faster_rcnn_resnet50_fpn_venus](image/object_detection/faster_rcnn_resnet50_fpn_venus)|faster_rcnn|百度自建数据集|大规模通用目标检测| +|[ssd_vgg16_300_coco2017](image/object_detection/ssd_vgg16_300_coco2017)|||| +|[yolov3_resnet34_coco2017](image/object_detection/yolov3_resnet34_coco2017)|YOLOv3|COCO2017|| +|[yolov3_darknet53_pedestrian](image/object_detection/yolov3_darknet53_pedestrian)|YOLOv3|百度自建大规模行人数据集|行人检测| +|[yolov3_mobilenet_v1_coco2017](image/object_detection/yolov3_mobilenet_v1_coco2017)|YOLOv3|COCO2017|| +|[ssd_mobilenet_v1_pascal](image/object_detection/ssd_mobilenet_v1_pascal)|SSD|PASCAL VOC|| +|[faster_rcnn_resnet50_fpn_coco2017](image/object_detection/faster_rcnn_resnet50_fpn_coco2017)|faster_rcnn|COCO2017|| +|[yolov3_darknet53_coco2017](image/object_detection/yolov3_darknet53_coco2017)|YOLOv3|COCO2017|| +|[yolov3_darknet53_vehicles](image/object_detection/yolov3_darknet53_vehicles)|YOLOv3|百度自建大规模车辆数据集|车辆检测| +|[yolov3_darknet53_venus](image/object_detection/yolov3_darknet53_venus)|YOLOv3|百度自建数据集|大规模通用检测| +|[yolov3_resnet50_vd_coco2017](image/object_detection/yolov3_resnet50_vd_coco2017)|YOLOv3|COCO2017|| + + - ### Depth Estimation + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[MiDaS_Large](image/depth_estimation/MiDaS_Large)|-|3D Movies, WSVD, ReDWeb, MegaDepth|| +|[MiDaS_Small](image/depth_estimation/MiDaS_Small)|-|3D Movies, WSVD, ReDWeb, MegaDepth, etc.|| + +## Text + - ### Text Generation + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[ernie_gen](text/text_generation/ernie_gen)|ERNIE-GEN|-|面向生成任务的预训练-微调框架| +|[ernie_gen_poetry](text/text_generation/ernie_gen_poetry)|ERNIE-GEN|开源诗歌数据集|诗歌生成| +|[ernie_gen_couplet](text/text_generation/ernie_gen_couplet)|ERNIE-GEN|开源对联数据集|对联生成| +|[ernie_gen_lover_words](text/text_generation/ernie_gen_lover_words)|ERNIE-GEN|网络情诗、情话数据|情话生成| +|[ernie_tiny_couplet](text/text_generation/ernie_tiny_couplet)|Eernie_tiny|开源对联数据集|对联生成| +|[ernie_gen_acrostic_poetry](text/text_generation/ernie_gen_acrostic_poetry)|ERNIE-GEN|开源诗歌数据集|藏头诗生成| +|[Rumor_prediction](text/text_generation/Rumor_prediction)|-|新浪微博中文谣言数据|谣言预测| +|[plato-mini](text/text_generation/plato-mini)|Unified Transformer|十亿级别的中文对话数据|中文对话| +|[plato2_en_large](text/text_generation/plato2_en_large)|plato2|开放域多轮数据集|超大规模生成式对话| +|[plato2_en_base](text/text_generation/plato2_en_base)|plato2|开放域多轮数据集|超大规模生成式对话| +|[CPM_LM](text/text_generation/CPM_LM)|GPT-2|自建数据集|中文文本生成| +|[unified_transformer-12L-cn](text/text_generation/unified_transformer-12L-cn)|Unified Transformer|千万级别中文会话数据|人机多轮对话| +|[unified_transformer-12L-cn-luge](text/text_generation/unified_transformer-12L-cn-luge)|Unified Transformer|千言对话数据集|人机多轮对话| +|[reading_pictures_writing_poems](text/text_generation/reading_pictures_writing_poems)|多网络级联|-|看图写诗| +|[GPT2_CPM_LM](text/text_generation/GPT2_CPM_LM)|||问答类文本生成| +|[GPT2_Base_CN](text/text_generation/GPT2_Base_CN)|||问答类文本生成| + + - ### Word Embedding + +
expand
+ +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[w2v_weibo_target_word-bigram_dim300](text/embedding/w2v_weibo_target_word-bigram_dim300)|w2v|weibo|| +|[w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_literature_target_word-word_dim300](text/embedding/w2v_literature_target_word-word_dim300)|w2v|literature|| +|[word2vec_skipgram](text/embedding/word2vec_skipgram)|skip-gram|百度自建数据集|| +|[w2v_sogou_target_word-char_dim300](text/embedding/w2v_sogou_target_word-char_dim300)|w2v|sogou|| +|[w2v_weibo_target_bigram-char_dim300](text/embedding/w2v_weibo_target_bigram-char_dim300)|w2v|weibo|| +|[w2v_zhihu_target_word-bigram_dim300](text/embedding/w2v_zhihu_target_word-bigram_dim300)|w2v|zhihu|| +|[w2v_financial_target_word-word_dim300](text/embedding/w2v_financial_target_word-word_dim300)|w2v|financial|| +|[w2v_wiki_target_word-word_dim300](text/embedding/w2v_wiki_target_word-word_dim300)|w2v|wiki|| +|[w2v_baidu_encyclopedia_context_word-word_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-word_dim300)|w2v|baidu_encyclopedia|| +|[w2v_weibo_target_word-word_dim300](text/embedding/w2v_weibo_target_word-word_dim300)|w2v|weibo|| +|[w2v_zhihu_target_bigram-char_dim300](text/embedding/w2v_zhihu_target_bigram-char_dim300)|w2v|zhihu|| +|[w2v_zhihu_target_word-word_dim300](text/embedding/w2v_zhihu_target_word-word_dim300)|w2v|zhihu|| +|[w2v_people_daily_target_word-char_dim300](text/embedding/w2v_people_daily_target_word-char_dim300)|w2v|people_daily|| +|[w2v_sikuquanshu_target_word-word_dim300](text/embedding/w2v_sikuquanshu_target_word-word_dim300)|w2v|sikuquanshu|| +|[glove_twitter_target_word-word_dim200_en](text/embedding/glove_twitter_target_word-word_dim200_en)|fasttext|twitter|| +|[fasttext_crawl_target_word-word_dim300_en](text/embedding/fasttext_crawl_target_word-word_dim300_en)|fasttext|crawl|| +|[w2v_wiki_target_word-bigram_dim300](text/embedding/w2v_wiki_target_word-bigram_dim300)|w2v|wiki|| +|[w2v_baidu_encyclopedia_context_word-character_char1-1_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300)|w2v|baidu_encyclopedia|| +|[glove_wiki2014-gigaword_target_word-word_dim300_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim300_en)|glove|wiki2014-gigaword|| +|[glove_wiki2014-gigaword_target_word-word_dim50_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim50_en)|glove|wiki2014-gigaword|| +|[w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_wiki_target_bigram-char_dim300](text/embedding/w2v_wiki_target_bigram-char_dim300)|w2v|wiki|| +|[w2v_baidu_encyclopedia_target_word-character_char1-1_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300)|w2v|baidu_encyclopedia|| +|[w2v_financial_target_bigram-char_dim300](text/embedding/w2v_financial_target_bigram-char_dim300)|w2v|financial|| +|[glove_wiki2014-gigaword_target_word-word_dim200_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim200_en)|glove|wiki2014-gigaword|| +|[w2v_financial_target_word-bigram_dim300](text/embedding/w2v_financial_target_word-bigram_dim300)|w2v|financial|| +|[w2v_mixed-large_target_word-char_dim300](text/embedding/w2v_mixed-large_target_word-char_dim300)|w2v|mixed|| +|[w2v_baidu_encyclopedia_target_word-wordPosition_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-wordPosition_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_target_word-wordLR_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-wordLR_dim300)|w2v|baidu_encyclopedia|| +|[w2v_sogou_target_bigram-char_dim300](text/embedding/w2v_sogou_target_bigram-char_dim300)|w2v|sogou|| +|[w2v_weibo_target_word-char_dim300](text/embedding/w2v_weibo_target_word-char_dim300)|w2v|weibo|| +|[w2v_people_daily_target_word-word_dim300](text/embedding/w2v_people_daily_target_word-word_dim300)|w2v|people_daily|| +|[w2v_zhihu_target_word-char_dim300](text/embedding/w2v_zhihu_target_word-char_dim300)|w2v|zhihu|| +|[w2v_wiki_target_word-char_dim300](text/embedding/w2v_wiki_target_word-char_dim300)|w2v|wiki|| +|[w2v_sogou_target_word-bigram_dim300](text/embedding/w2v_sogou_target_word-bigram_dim300)|w2v|sogou|| +|[w2v_financial_target_word-char_dim300](text/embedding/w2v_financial_target_word-char_dim300)|w2v|financial|| +|[w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300)|w2v|baidu_encyclopedia|| +|[glove_wiki2014-gigaword_target_word-word_dim100_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim100_en)|glove|wiki2014-gigaword|| +|[w2v_baidu_encyclopedia_target_word-character_char1-4_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300)|w2v|baidu_encyclopedia|| +|[w2v_sogou_target_word-word_dim300](text/embedding/w2v_sogou_target_word-word_dim300)|w2v|sogou|| +|[w2v_literature_target_word-char_dim300](text/embedding/w2v_literature_target_word-char_dim300)|w2v|literature|| +|[w2v_baidu_encyclopedia_target_bigram-char_dim300](text/embedding/w2v_baidu_encyclopedia_target_bigram-char_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_target_word-word_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-word_dim300)|w2v|baidu_encyclopedia|| +|[glove_twitter_target_word-word_dim100_en](text/embedding/glove_twitter_target_word-word_dim100_en)|glove|crawl|| +|[w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_context_word-character_char1-4_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300)|w2v|baidu_encyclopedia|| +|[w2v_literature_target_bigram-char_dim300](text/embedding/w2v_literature_target_bigram-char_dim300)|w2v|literature|| +|[fasttext_wiki-news_target_word-word_dim300_en](text/embedding/fasttext_wiki-news_target_word-word_dim300_en)|fasttext|wiki-news|| +|[w2v_people_daily_target_word-bigram_dim300](text/embedding/w2v_people_daily_target_word-bigram_dim300)|w2v|people_daily|| +|[w2v_mixed-large_target_word-word_dim300](text/embedding/w2v_mixed-large_target_word-word_dim300)|w2v|mixed|| +|[w2v_people_daily_target_bigram-char_dim300](text/embedding/w2v_people_daily_target_bigram-char_dim300)|w2v|people_daily|| +|[w2v_literature_target_word-bigram_dim300](text/embedding/w2v_literature_target_word-bigram_dim300)|w2v|literature|| +|[glove_twitter_target_word-word_dim25_en](text/embedding/glove_twitter_target_word-word_dim25_en)|glove|twitter|| +|[w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_sikuquanshu_target_word-bigram_dim300](text/embedding/w2v_sikuquanshu_target_word-bigram_dim300)|w2v|sikuquanshu|| +|[w2v_baidu_encyclopedia_context_word-character_char1-2_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300)|w2v|baidu_encyclopedia|| +|[glove_twitter_target_word-word_dim50_en](text/embedding/glove_twitter_target_word-word_dim50_en)|glove|twitter|| +|[w2v_baidu_encyclopedia_context_word-wordLR_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-wordLR_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_target_word-character_char1-2_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_context_word-wordPosition_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-wordPosition_dim300)|w2v|baidu_encyclopedia|| + +
+ + - ### Machine Translation + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[transformer_zh-en](text/machine_translation/transformer/transformer_zh-en)|Transformer|CWMT2021|中文译英文| +|[transformer_en-de](text/machine_translation/transformer/transformer_en-de)|Transformer|WMT14 EN-DE|英文译德文| + + - ### Language Model + +
expand
+ +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[chinese_electra_small](text/language_model/chinese_electra_small)|||| +|[chinese_electra_base](text/language_model/chinese_electra_base)|||| +|[roberta-wwm-ext-large](text/language_model/roberta-wwm-ext-large)|roberta-wwm-ext-large|百度自建数据集|| +|[chinese-bert-wwm-ext](text/language_model/chinese_bert_wwm_ext)|chinese-bert-wwm-ext|百度自建数据集|| +|[lda_webpage](text/language_model/lda_webpage)|LDA|百度自建网页领域数据集|| +|[lda_novel](text/language_model/lda_novel)|||| +|[bert-base-multilingual-uncased](text/language_model/bert-base-multilingual-uncased)|||| +|[rbt3](text/language_model/rbt3)|||| +|[ernie_v2_eng_base](text/language_model/ernie_v2_eng_base)|ernie_v2_eng_base|百度自建数据集|| +|[bert-base-multilingual-cased](text/language_model/bert-base-multilingual-cased)|||| +|[rbtl3](text/language_model/rbtl3)|||| +|[chinese-bert-wwm](text/language_model/chinese_bert_wwm)|chinese-bert-wwm|百度自建数据集|| +|[bert-large-uncased](text/language_model/bert-large-uncased)|||| +|[slda_novel](text/language_model/slda_novel)|||| +|[slda_news](text/language_model/slda_news)|||| +|[electra_small](text/language_model/electra_small)|||| +|[slda_webpage](text/language_model/slda_webpage)|||| +|[bert-base-cased](text/language_model/bert-base-cased)|||| +|[slda_weibo](text/language_model/slda_weibo)|||| +|[roberta-wwm-ext](text/language_model/roberta-wwm-ext)|roberta-wwm-ext|百度自建数据集|| +|[bert-base-uncased](text/language_model/bert-base-uncased)|||| +|[electra_large](text/language_model/electra_large)|||| +|[ernie](text/language_model/ernie)|ernie-1.0|百度自建数据集|| +|[simnet_bow](text/language_model/simnet_bow)|BOW|百度自建数据集|| +|[ernie_tiny](text/language_model/ernie_tiny)|ernie_tiny|百度自建数据集|| +|[bert-base-chinese](text/language_model/bert-base-chinese)|bert-base-chinese|百度自建数据集|| +|[lda_news](text/language_model/lda_news)|LDA|百度自建新闻领域数据集|| +|[electra_base](text/language_model/electra_base)|||| +|[ernie_v2_eng_large](text/language_model/ernie_v2_eng_large)|ernie_v2_eng_large|百度自建数据集|| +|[bert-large-cased](text/language_model/bert-large-cased)|||| + +
+ + + - ### Sentiment Analysis + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[ernie_skep_sentiment_analysis](text/sentiment_analysis/ernie_skep_sentiment_analysis)|SKEP|百度自建数据集|句子级情感分析| +|[emotion_detection_textcnn](text/sentiment_analysis/emotion_detection_textcnn)|TextCNN|百度自建数据集|对话情绪识别| +|[senta_bilstm](text/sentiment_analysis/senta_bilstm)|BiLSTM|百度自建数据集|中文情感倾向分析| +|[senta_bow](text/sentiment_analysis/senta_bow)|BOW|百度自建数据集|中文情感倾向分析| +|[senta_gru](text/sentiment_analysis/senta_gru)|GRU|百度自建数据集|中文情感倾向分析| +|[senta_lstm](text/sentiment_analysis/senta_lstm)|LSTM|百度自建数据集|中文情感倾向分析| +|[senta_cnn](text/sentiment_analysis/senta_cnn)|CNN|百度自建数据集|中文情感倾向分析| + + - ### Syntactic Analysis + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[DDParser](text/syntactic_analysis/DDParser)|Deep Biaffine Attention|搜索query、网页文本、语音输入等数据|句法分析| + + - ### Simultaneous Translation + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[transformer_nist_wait_1](text/simultaneous_translation/stacl/transformer_nist_wait_1)|transformer|NIST 2008-中英翻译数据集|中译英-wait-1策略| +|[transformer_nist_wait_3](text/simultaneous_translation/stacl/transformer_nist_wait_3)|transformer|NIST 2008-中英翻译数据集|中译英-wait-3策略| +|[transformer_nist_wait_5](text/simultaneous_translation/stacl/transformer_nist_wait_5)|transformer|NIST 2008-中英翻译数据集|中译英-wait-5策略| +|[transformer_nist_wait_7](text/simultaneous_translation/stacl/transformer_nist_wait_7)|transformer|NIST 2008-中英翻译数据集|中译英-wait-7策略| +|[transformer_nist_wait_all](text/simultaneous_translation/stacl/transformer_nist_wait_all)|transformer|NIST 2008-中英翻译数据集|中译英-waitk=-1策略| + + + - ### Lexical Analysis + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[jieba_paddle](text/lexical_analysis/jieba_paddle)|BiGRU+CRF|百度自建数据集|jieba使用Paddle搭建的切词网络(双向GRU)。同时支持jieba的传统切词方法,如精确模式、全模式、搜索引擎模式等切词模式。| +|[lac](text/lexical_analysis/lac)|BiGRU+CRF|百度自建数据集|百度自研联合的词法分析模型,能整体性地完成中文分词、词性标注、专名识别任务。在百度自建数据集上评测,LAC效果:Precision=88.0%,Recall=88.7%,F1-Score=88.4%。| + + - ### Punctuation Restoration + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[auto_punc](text/punctuation_restoration/auto_punc)|Ernie-1.0|WuDaoCorpora 2.0|自动添加7种标点符号| + + - ### Text Review + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[porn_detection_cnn](text/text_review/porn_detection_cnn)|CNN|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| +|[porn_detection_gru](text/text_review/porn_detection_gru)|GRU|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| +|[porn_detection_lstm](text/text_review/porn_detection_lstm)|LSTM|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| + +## Audio + + - ### Voice cloning + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[ge2e_fastspeech2_pwgan](audio/voice_cloning/ge2e_fastspeech2_pwgan)|FastSpeech2|AISHELL-3|中文语音克隆| +|[lstm_tacotron2](audio/voice_cloning/lstm_tacotron2)|LSTM、Tacotron2、WaveFlow|AISHELL-3|中文语音克隆| + + - ### Text to Speech + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[transformer_tts_ljspeech](audio/tts/transformer_tts_ljspeech)|Transformer|LJSpeech-1.1|英文语音合成| +|[fastspeech_ljspeech](audio/tts/fastspeech_ljspeech)|FastSpeech|LJSpeech-1.1|英文语音合成| +|[fastspeech2_baker](audio/tts/fastspeech2_baker)|FastSpeech2|Chinese Standard Mandarin Speech Copus|中文语音合成| +|[fastspeech2_ljspeech](audio/tts/fastspeech2_ljspeech)|FastSpeech2|LJSpeech-1.1|英文语音合成| +|[deepvoice3_ljspeech](audio/tts/deepvoice3_ljspeech)|DeepVoice3|LJSpeech-1.1|英文语音合成| + + - ### Automatic Speech Recognition + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[deepspeech2_aishell](audio/asr/deepspeech2_aishell)|DeepSpeech2|AISHELL-1|中文语音识别| +|[deepspeech2_librispeech](audio/asr/deepspeech2_librispeech)|DeepSpeech2|LibriSpeech|英文语音识别| +|[u2_conformer_aishell](audio/asr/u2_conformer_aishell)|Conformer|AISHELL-1|中文语音识别| +|[u2_conformer_wenetspeech](audio/asr/u2_conformer_wenetspeech)|Conformer|WenetSpeech|中文语音识别| +|[u2_conformer_librispeech](audio/asr/u2_conformer_librispeech)|Conformer|LibriSpeech|英文语音识别| + + + - ### Audio Classification + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[panns_cnn6](audio/audio_classification/PANNs/cnn6)|PANNs|Google Audioset|主要包含4个卷积层和2个全连接层,模型参数为4.5M。经过预训练后,可以用于提取音频的embbedding,维度是512| +|[panns_cnn14](audio/audio_classification/PANNs/cnn14)|PANNs|Google Audioset|主要包含12个卷积层和2个全连接层,模型参数为79.6M。经过预训练后,可以用于提取音频的embbedding,维度是2048| +|[panns_cnn10](audio/audio_classification/PANNs/cnn10)|PANNs|Google Audioset|主要包含8个卷积层和2个全连接层,模型参数为4.9M。经过预训练后,可以用于提取音频的embbedding,维度是512| + +## Video + - ### Video Classification + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[videotag_tsn_lstm](video/classification/videotag_tsn_lstm)|TSN + AttentionLSTM|百度自建数据集|大规模短视频分类打标签| +|[tsn_kinetics400](video/classification/tsn_kinetics400)|TSN|Kinetics-400|视频分类| +|[tsm_kinetics400](video/classification/tsm_kinetics400)|TSM|Kinetics-400|视频分类| +|[stnet_kinetics400](video/classification/stnet_kinetics400)|StNet|Kinetics-400|视频分类| +|[nonlocal_kinetics400](video/classification/nonlocal_kinetics400)|Non-local|Kinetics-400|视频分类| + + + - ### Video Editing + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[SkyAR](video/Video_editing/SkyAR)|UNet|UNet|视频换天| + + - ### Multiple Object tracking + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[fairmot_dla34](video/multiple_object_tracking/fairmot_dla34)|CenterNet|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|实时多目标跟踪| +|[jde_darknet53](video/multiple_object_tracking/jde_darknet53)|YOLOv3|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|多目标跟踪-兼顾精度和速度| + +## Industrial Application + + - ### Meter Detection + +|module|Network|Dataset|Introduction| +|--|--|--|--| +|[WatermeterSegmentation](image/semantic_segmentation/WatermeterSegmentation)|DeepLabV3|水表的数字表盘分割数据集|水表的数字表盘分割| diff --git a/modules/README_ch.md b/modules/README_ch.md new file mode 100644 index 0000000000000000000000000000000000000000..d3389e3c3307494357437e2e65d933a9e40c6663 --- /dev/null +++ b/modules/README_ch.md @@ -0,0 +1,546 @@ +简体中文 | [English](README.md) + +# 目录 +|[图像](#图像) (212个)|[文本](#文本) (130个)|[语音](#语音) (15个)|[视频](#视频) (8个)|[工业应用](#工业应用) (1个)| +|--|--|--|--|--| +|[图像分类](#图像分类) (108)|[文本生成](#文本生成) (17)| [声音克隆](#声音克隆) (2)|[视频分类](#视频分类) (5)| [表针识别](#表针识别) (1)| +|[图像生成](#图像生成) (26)|[词向量](#词向量) (62)|[语音合成](#语音合成) (5)|[视频修复](#视频修复) (1)|-| +|[关键点检测](#关键点检测) (5)|[机器翻译](#机器翻译) (2)|[语音识别](#语音识别) (5)|[多目标追踪](#多目标追踪) (2)|-| +|[图像分割](#图像分割) (25)|[语义模型](#语义模型) (30)|[声音分类](#声音分类) (3)| -|-| +|[人脸检测](#人脸检测) (7)|[情感分析](#情感分析) (7)|-|-|-| +|[文字识别](#文字识别) (17)|[句法分析](#句法分析) (1)|-|-|-| +|[图像编辑](#图像编辑) (8)|[同声传译](#同声传译) (5)|-|-|-| +|[实例分割](#实例分割) (1)|[词法分析](#词法分析) (2)|-|-|-| +|[目标检测](#目标检测) (13)|[标点恢复](#标点恢复) (1)|-|-|-| +|[深度估计](#深度估计) (2)|[文本审核](#文本审核) (3)|-|-|-| + +## 图像 + - ### 图像分类 + +
expand
+ +|module|网络|数据集|简介| +|--|--|--|--| +|[DriverStatusRecognition](image/classification/DriverStatusRecognition)|MobileNetV3_small_ssld|分心司机检测数据集|| +|[mobilenet_v2_animals](image/classification/mobilenet_v2_animals)|MobileNet_v2|百度自建动物数据集|| +|[repvgg_a1_imagenet](image/classification/repvgg_a1_imagenet)|RepVGG|ImageNet-2012|| +|[repvgg_a0_imagenet](image/classification/repvgg_a0_imagenet)|RepVGG|ImageNet-2012|| +|[resnext152_32x4d_imagenet](image/classification/resnext152_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[resnet_v2_152_imagenet](image/classification/resnet_v2_152_imagenet)|ResNet V2|ImageNet-2012|| +|[resnet50_vd_animals](image/classification/resnet50_vd_animals)|ResNet50_vd|百度自建动物数据集|| +|[food_classification](image/classification/food_classification)|ResNet50_vd_ssld|美食数据集|| +|[mobilenet_v3_large_imagenet_ssld](image/classification/mobilenet_v3_large_imagenet_ssld)|Mobilenet_v3_large|ImageNet-2012|| +|[resnext152_vd_32x4d_imagenet](image/classification/resnext152_vd_32x4d_imagenet)|||| +|[ghostnet_x1_3_imagenet_ssld](image/classification/ghostnet_x1_3_imagenet_ssld)|GhostNet|ImageNet-2012|| +|[rexnet_1_5_imagenet](image/classification/rexnet_1_5_imagenet)|ReXNet|ImageNet-2012|| +|[resnext50_64x4d_imagenet](image/classification/resnext50_64x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[resnext101_64x4d_imagenet](image/classification/resnext101_64x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[efficientnetb0_imagenet](image/classification/efficientnetb0_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb1_imagenet](image/classification/efficientnetb1_imagenet)|EfficientNet|ImageNet-2012|| +|[mobilenet_v2_imagenet_ssld](image/classification/mobilenet_v2_imagenet_ssld)|Mobilenet_v2|ImageNet-2012|| +|[resnet50_vd_dishes](image/classification/resnet50_vd_dishes)|ResNet50_vd|百度自建菜品数据集|| +|[pnasnet_imagenet](image/classification/pnasnet_imagenet)|PNASNet|ImageNet-2012|| +|[rexnet_2_0_imagenet](image/classification/rexnet_2_0_imagenet)|ReXNet|ImageNet-2012|| +|[SnakeIdentification](image/classification/SnakeIdentification)|ResNet50_vd_ssld|蛇种数据集|| +|[hrnet40_imagenet](image/classification/hrnet40_imagenet)|HRNet|ImageNet-2012|| +|[resnet_v2_34_imagenet](image/classification/resnet_v2_34_imagenet)|ResNet V2|ImageNet-2012|| +|[mobilenet_v2_dishes](image/classification/mobilenet_v2_dishes)|MobileNet_v2|百度自建菜品数据集|| +|[resnext101_vd_32x4d_imagenet](image/classification/resnext101_vd_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[repvgg_b2g4_imagenet](image/classification/repvgg_b2g4_imagenet)|RepVGG|ImageNet-2012|| +|[fix_resnext101_32x48d_wsl_imagenet](image/classification/fix_resnext101_32x48d_wsl_imagenet)|ResNeXt|ImageNet-2012|| +|[vgg13_imagenet](image/classification/vgg13_imagenet)|VGG|ImageNet-2012|| +|[se_resnext101_32x4d_imagenet](image/classification/se_resnext101_32x4d_imagenet)|SE_ResNeXt|ImageNet-2012|| +|[hrnet30_imagenet](image/classification/hrnet30_imagenet)|HRNet|ImageNet-2012|| +|[ghostnet_x1_3_imagenet](image/classification/ghostnet_x1_3_imagenet)|GhostNet|ImageNet-2012|| +|[dpn107_imagenet](image/classification/dpn107_imagenet)|DPN|ImageNet-2012|| +|[densenet161_imagenet](image/classification/densenet161_imagenet)|DenseNet|ImageNet-2012|| +|[vgg19_imagenet](image/classification/vgg19_imagenet)|vgg19_imagenet|ImageNet-2012|| +|[mobilenet_v2_imagenet](image/classification/mobilenet_v2_imagenet)|Mobilenet_v2|ImageNet-2012|| +|[resnet50_vd_10w](image/classification/resnet50_vd_10w)|ResNet_vd|百度自建数据集|| +|[resnet_v2_101_imagenet](image/classification/resnet_v2_101_imagenet)|ResNet V2 101|ImageNet-2012|| +|[darknet53_imagenet](image/classification/darknet53_imagenet)|DarkNet|ImageNet-2012|| +|[se_resnext50_32x4d_imagenet](image/classification/se_resnext50_32x4d_imagenet)|SE_ResNeXt|ImageNet-2012|| +|[se_hrnet64_imagenet_ssld](image/classification/se_hrnet64_imagenet_ssld)|HRNet|ImageNet-2012|| +|[resnext101_32x16d_wsl](image/classification/resnext101_32x16d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[hrnet18_imagenet](image/classification/hrnet18_imagenet)|HRNet|ImageNet-2012|| +|[spinalnet_res101_gemstone](image/classification/spinalnet_res101_gemstone)|resnet101|gemstone|| +|[densenet264_imagenet](image/classification/densenet264_imagenet)|DenseNet|ImageNet-2012|| +|[resnext50_vd_32x4d_imagenet](image/classification/resnext50_vd_32x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[SpinalNet_Gemstones](image/classification/SpinalNet_Gemstones)|||| +|[spinalnet_vgg16_gemstone](image/classification/spinalnet_vgg16_gemstone)|vgg16|gemstone|| +|[xception71_imagenet](image/classification/xception71_imagenet)|Xception|ImageNet-2012|| +|[repvgg_b2_imagenet](image/classification/repvgg_b2_imagenet)|RepVGG|ImageNet-2012|| +|[dpn68_imagenet](image/classification/dpn68_imagenet)|DPN|ImageNet-2012|| +|[alexnet_imagenet](image/classification/alexnet_imagenet)|AlexNet|ImageNet-2012|| +|[rexnet_1_3_imagenet](image/classification/rexnet_1_3_imagenet)|ReXNet|ImageNet-2012|| +|[hrnet64_imagenet](image/classification/hrnet64_imagenet)|HRNet|ImageNet-2012|| +|[efficientnetb7_imagenet](image/classification/efficientnetb7_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb0_small_imagenet](image/classification/efficientnetb0_small_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb6_imagenet](image/classification/efficientnetb6_imagenet)|EfficientNet|ImageNet-2012|| +|[hrnet48_imagenet](image/classification/hrnet48_imagenet)|HRNet|ImageNet-2012|| +|[rexnet_3_0_imagenet](image/classification/rexnet_3_0_imagenet)|ReXNet|ImageNet-2012|| +|[shufflenet_v2_imagenet](image/classification/shufflenet_v2_imagenet)|ShuffleNet V2|ImageNet-2012|| +|[ghostnet_x0_5_imagenet](image/classification/ghostnet_x0_5_imagenet)|GhostNet|ImageNet-2012|| +|[inception_v4_imagenet](image/classification/inception_v4_imagenet)|Inception_V4|ImageNet-2012|| +|[resnext101_vd_64x4d_imagenet](image/classification/resnext101_vd_64x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[densenet201_imagenet](image/classification/densenet201_imagenet)|DenseNet|ImageNet-2012|| +|[vgg16_imagenet](image/classification/vgg16_imagenet)|VGG|ImageNet-2012|| +|[mobilenet_v3_small_imagenet_ssld](image/classification/mobilenet_v3_small_imagenet_ssld)|Mobilenet_v3_Small|ImageNet-2012|| +|[hrnet18_imagenet_ssld](image/classification/hrnet18_imagenet_ssld)|HRNet|ImageNet-2012|| +|[resnext152_64x4d_imagenet](image/classification/resnext152_64x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[efficientnetb3_imagenet](image/classification/efficientnetb3_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb2_imagenet](image/classification/efficientnetb2_imagenet)|EfficientNet|ImageNet-2012|| +|[repvgg_b1g4_imagenet](image/classification/repvgg_b1g4_imagenet)|RepVGG|ImageNet-2012|| +|[resnext101_32x4d_imagenet](image/classification/resnext101_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[resnext50_32x4d_imagenet](image/classification/resnext50_32x4d_imagenet)|ResNeXt|ImageNet-2012|| +|[repvgg_a2_imagenet](image/classification/repvgg_a2_imagenet)|RepVGG|ImageNet-2012|| +|[resnext152_vd_64x4d_imagenet](image/classification/resnext152_vd_64x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[xception41_imagenet](image/classification/xception41_imagenet)|Xception|ImageNet-2012|| +|[googlenet_imagenet](image/classification/googlenet_imagenet)|GoogleNet|ImageNet-2012|| +|[resnet50_vd_imagenet_ssld](image/classification/resnet50_vd_imagenet_ssld)|ResNet_vd|ImageNet-2012|| +|[repvgg_b1_imagenet](image/classification/repvgg_b1_imagenet)|RepVGG|ImageNet-2012|| +|[repvgg_b0_imagenet](image/classification/repvgg_b0_imagenet)|RepVGG|ImageNet-2012|| +|[resnet_v2_50_imagenet](image/classification/resnet_v2_50_imagenet)|ResNet V2|ImageNet-2012|| +|[rexnet_1_0_imagenet](image/classification/rexnet_1_0_imagenet)|ReXNet|ImageNet-2012|| +|[resnet_v2_18_imagenet](image/classification/resnet_v2_18_imagenet)|ResNet V2|ImageNet-2012|| +|[resnext101_32x8d_wsl](image/classification/resnext101_32x8d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[efficientnetb4_imagenet](image/classification/efficientnetb4_imagenet)|EfficientNet|ImageNet-2012|| +|[efficientnetb5_imagenet](image/classification/efficientnetb5_imagenet)|EfficientNet|ImageNet-2012|| +|[repvgg_b1g2_imagenet](image/classification/repvgg_b1g2_imagenet)|RepVGG|ImageNet-2012|| +|[resnext101_32x48d_wsl](image/classification/resnext101_32x48d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[resnet50_vd_wildanimals](image/classification/resnet50_vd_wildanimals)|ResNet_vd|IFAW 自建野生动物数据集|| +|[nasnet_imagenet](image/classification/nasnet_imagenet)|NASNet|ImageNet-2012|| +|[se_resnet18_vd_imagenet](image/classification/se_resnet18_vd_imagenet)|||| +|[spinalnet_res50_gemstone](image/classification/spinalnet_res50_gemstone)|resnet50|gemstone|| +|[resnext50_vd_64x4d_imagenet](image/classification/resnext50_vd_64x4d_imagenet)|ResNeXt_vd|ImageNet-2012|| +|[resnext101_32x32d_wsl](image/classification/resnext101_32x32d_wsl)|ResNeXt_wsl|ImageNet-2012|| +|[dpn131_imagenet](image/classification/dpn131_imagenet)|DPN|ImageNet-2012|| +|[xception65_imagenet](image/classification/xception65_imagenet)|Xception|ImageNet-2012|| +|[repvgg_b3g4_imagenet](image/classification/repvgg_b3g4_imagenet)|RepVGG|ImageNet-2012|| +|[marine_biometrics](image/classification/marine_biometrics)|ResNet50_vd_ssld|Fish4Knowledge|| +|[res2net101_vd_26w_4s_imagenet](image/classification/res2net101_vd_26w_4s_imagenet)|Res2Net|ImageNet-2012|| +|[dpn98_imagenet](image/classification/dpn98_imagenet)|DPN|ImageNet-2012|| +|[resnet18_vd_imagenet](image/classification/resnet18_vd_imagenet)|ResNet_vd|ImageNet-2012|| +|[densenet121_imagenet](image/classification/densenet121_imagenet)|DenseNet|ImageNet-2012|| +|[vgg11_imagenet](image/classification/vgg11_imagenet)|VGG|ImageNet-2012|| +|[hrnet44_imagenet](image/classification/hrnet44_imagenet)|HRNet|ImageNet-2012|| +|[densenet169_imagenet](image/classification/densenet169_imagenet)|DenseNet|ImageNet-2012|| +|[hrnet32_imagenet](image/classification/hrnet32_imagenet)|HRNet|ImageNet-2012|| +|[dpn92_imagenet](image/classification/dpn92_imagenet)|DPN|ImageNet-2012|| +|[ghostnet_x1_0_imagenet](image/classification/ghostnet_x1_0_imagenet)|GhostNet|ImageNet-2012|| +|[hrnet48_imagenet_ssld](image/classification/hrnet48_imagenet_ssld)|HRNet|ImageNet-2012|| + +
+ + + - ### 图像生成 + +|module|网络|数据集|简介| +|--|--|--|--| +|[pixel2style2pixel](image/Image_gan/gan/pixel2style2pixel/)|Pixel2Style2Pixel|-|人脸转正| +|[stgan_bald](image/Image_gan/gan/stgan_bald/)|STGAN|CelebA|秃头生成器| +|[styleganv2_editing](image/Image_gan/gan/styleganv2_editing)|StyleGAN V2|-|人脸编辑| +|[wav2lip](image/Image_gan/gan/wav2lip)|wav2lip|LRS2|唇形生成| +|[attgan_celeba](image/Image_gan/attgan_celeba/)|AttGAN|Celeba|人脸编辑| +|[cyclegan_cityscapes](image/Image_gan/cyclegan_cityscapes)|CycleGAN|Cityscapes|实景图和语义分割结果互相转换| +|[stargan_celeba](image/Image_gan/stargan_celeba)|StarGAN|Celeba|人脸编辑| +|[stgan_celeba](image/Image_gan/stgan_celeba/)|STGAN|Celeba|人脸编辑| +|[ID_Photo_GEN](image/Image_gan/style_transfer/ID_Photo_GEN)|HRNet_W18|-|证件照生成| +|[Photo2Cartoon](image/Image_gan/style_transfer/Photo2Cartoon)|U-GAT-IT|cartoon_data|人脸卡通化| +|[U2Net_Portrait](image/Image_gan/style_transfer/U2Net_Portrait)|U^2Net|-|人脸素描化| +|[UGATIT_100w](image/Image_gan/style_transfer/UGATIT_100w)|U-GAT-IT|selfie2anime|人脸动漫化| +|[UGATIT_83w](image/Image_gan/style_transfer/UGATIT_83w)|U-GAT-IT|selfie2anime|人脸动漫化| +|[UGATIT_92w](image/Image_gan/style_transfer/UGATIT_92w)| U-GAT-IT|selfie2anime|人脸动漫化| +|[animegan_v1_hayao_60](image/Image_gan/style_transfer/animegan_v1_hayao_60)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| +|[animegan_v2_hayao_64](image/Image_gan/style_transfer/animegan_v2_hayao_64)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| +|[animegan_v2_hayao_99](image/Image_gan/style_transfer/animegan_v2_hayao_99)|AnimeGAN|The Wind Rises|图像风格迁移-宫崎骏| +|[animegan_v2_paprika_54](image/Image_gan/style_transfer/animegan_v2_paprika_54)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_paprika_74](image/Image_gan/style_transfer/animegan_v2_paprika_74)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_paprika_97](image/Image_gan/style_transfer/animegan_v2_paprika_97)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_paprika_98](image/Image_gan/style_transfer/animegan_v2_paprika_98)|AnimeGAN|Paprika|图像风格迁移-今敏| +|[animegan_v2_shinkai_33](image/Image_gan/style_transfer/animegan_v2_shinkai_33)|AnimeGAN|Your Name, Weathering with you|图像风格迁移-新海诚| +|[animegan_v2_shinkai_53](image/Image_gan/style_transfer/animegan_v2_shinkai_53)|AnimeGAN|Your Name, Weathering with you|图像风格迁移-新海诚| +|[msgnet](image/Image_gan/style_transfer/msgnet)|msgnet|COCO2014| +|[stylepro_artistic](image/Image_gan/style_transfer/stylepro_artistic)|StyleProNet|MS-COCO + WikiArt|艺术风格迁移| +|stylegan_ffhq|StyleGAN|FFHQ|图像风格迁移| + + - ### 关键点检测 + +|module|网络|数据集|简介| +|--|--|--|--| +|[face_landmark_localization](image/keypoint_detection/face_landmark_localization)|Face_Landmark|AFW/AFLW|人脸关键点检测| +|[hand_pose_localization](image/keypoint_detection/hand_pose_localization)|-|MPII, NZSL|手部关键点检测| +|[openpose_body_estimation](image/keypoint_detection/openpose_body_estimation)|two-branch multi-stage CNN|MPII, COCO 2016|肢体关键点检测| +|[human_pose_estimation_resnet50_mpii](image/keypoint_detection/human_pose_estimation_resnet50_mpii)|Pose_Resnet50|MPII|人体骨骼关键点检测 +|[openpose_hands_estimation](image/keypoint_detection/openpose_hands_estimation)|-|MPII, NZSL|手部关键点检测| + + - ### 图像分割 + +|module|网络|数据集|简介| +|--|--|--|--| +|[deeplabv3p_xception65_humanseg](image/semantic_segmentation/deeplabv3p_xception65_humanseg)|deeplabv3p|百度自建数据集|人像分割| +|[humanseg_server](image/semantic_segmentation/humanseg_server)|deeplabv3p|百度自建数据集|人像分割| +|[humanseg_mobile](image/semantic_segmentation/humanseg_mobile)|hrnet|百度自建数据集|人像分割-移动端前置摄像头| +|[humanseg_lite](image/semantic_segmentation/umanseg_lite)|shufflenet|百度自建数据集|轻量级人像分割-移动端实时| +|[ExtremeC3_Portrait_Segmentation](image/semantic_segmentation/ExtremeC3_Portrait_Segmentation)|ExtremeC3|EG1800, Baidu fashion dataset|轻量化人像分割| +|[SINet_Portrait_Segmentation](image/semantic_segmentation/SINet_Portrait_Segmentation)|SINet|EG1800, Baidu fashion dataset|轻量化人像分割| +|[FCN_HRNet_W18_Face_Seg](image/semantic_segmentation/FCN_HRNet_W18_Face_Seg)|FCN_HRNet_W18|-|人像分割| +|[ace2p](image/semantic_segmentation/ace2p)|ACE2P|LIP|人体解析| +|[Pneumonia_CT_LKM_PP](image/semantic_segmentation/Pneumonia_CT_LKM_PP)|U-NET+|连心医疗授权脱敏数据集|肺炎CT影像分析| +|[Pneumonia_CT_LKM_PP_lung](image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung)|U-NET+|连心医疗授权脱敏数据集|肺炎CT影像分析| +|[ocrnet_hrnetw18_voc](image/semantic_segmentation/ocrnet_hrnetw18_voc)|ocrnet, hrnet|PascalVoc2012| +|[U2Net](image/semantic_segmentation/U2Net)|U^2Net|-|图像前景背景分割| +|[U2Netp](image/semantic_segmentation/U2Netp)|U^2Net|-|图像前景背景分割| +|[Extract_Line_Draft](image/semantic_segmentation/Extract_Line_Draft)|UNet|Pixiv|线稿提取| +|[unet_cityscapes](image/semantic_segmentation/unet_cityscapes)|UNet|cityscapes| +|[ocrnet_hrnetw18_cityscapes](image/semantic_segmentation/ocrnet_hrnetw18_cityscapes)|ocrnet_hrnetw18|cityscapes| +|[hardnet_cityscapes](image/semantic_segmentation/hardnet_cityscapes)|hardnet|cityscapes| +|[fcn_hrnetw48_voc](image/semantic_segmentation/fcn_hrnetw48_voc)|fcn_hrnetw48|PascalVoc2012| +|[fcn_hrnetw48_cityscapes](image/semantic_segmentation/fcn_hrnetw48_cityscapes)|fcn_hrnetw48|cityscapes| +|[fcn_hrnetw18_voc](image/semantic_segmentation/fcn_hrnetw18_voc)|fcn_hrnetw18|PascalVoc2012| +|[fcn_hrnetw18_cityscapes](image/semantic_segmentation/fcn_hrnetw18_cityscapes)|fcn_hrnetw18|cityscapes| +|[fastscnn_cityscapes](image/semantic_segmentation/fastscnn_cityscapes)|fastscnn|cityscapes| +|[deeplabv3p_resnet50_voc](image/semantic_segmentation/deeplabv3p_resnet50_voc)|deeplabv3p, resnet50|PascalVoc2012| +|[deeplabv3p_resnet50_cityscapes](image/semantic_segmentation/deeplabv3p_resnet50_cityscapes)|deeplabv3p, resnet50|cityscapes| +|[bisenetv2_cityscapes](image/semantic_segmentation/bisenetv2_cityscapes)|bisenetv2|cityscapes| + + + + - ### 人脸检测 + +|module|网络|数据集|简介| +|--|--|--|--| +|[pyramidbox_lite_mobile](image/face_detection/pyramidbox_lite_mobile)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸检测-移动端| +|[pyramidbox_lite_mobile_mask](image/face_detection/pyramidbox_lite_mobile_mask)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸口罩检测-移动端| +|[pyramidbox_lite_server_mask](image/face_detection/pyramidbox_lite_server_mask)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸口罩检测| +|[ultra_light_fast_generic_face_detector_1mb_640](image/face_detection/ultra_light_fast_generic_face_detector_1mb_640)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE数据集|轻量级通用人脸检测-低算力设备| +|[ultra_light_fast_generic_face_detector_1mb_320](image/face_detection/ultra_light_fast_generic_face_detector_1mb_320)|Ultra-Light-Fast-Generic-Face-Detector-1MB|WIDER FACE数据集|轻量级通用人脸检测-低算力设备| +|[pyramidbox_lite_server](image/face_detection/pyramidbox_lite_server)|PyramidBox|WIDER FACE数据集 + 百度自采人脸数据集|轻量级人脸检测| +|[pyramidbox_face_detection](image/face_detection/pyramidbox_face_detection)|PyramidBox|WIDER FACE数据集|人脸检测| + + - ### 文字识别 + +|module|网络|数据集|简介| +|--|--|--|--| +|[chinese_ocr_db_crnn_mobile](image/text_recognition/chinese_ocr_db_crnn_mobile)|Differentiable Binarization+RCNN|icdar2015数据集|中文文字识别|[chinese_text_detection_db_mobile](image/text_recognition/chinese_text_detection_db_mobile)|Differentiable Binarization|icdar2015数据集|中文文本检测| +|[chinese_text_detection_db_server](image/text_recognition/chinese_text_detection_db_server)|Differentiable Binarization|icdar2015数据集|中文文本检测| +|[chinese_ocr_db_crnn_server](image/text_recognition/chinese_ocr_db_crnn_server)|Differentiable Binarization+RCNN|icdar2015数据集|中文文字识别| +|[Vehicle_License_Plate_Recognition](image/text_recognition/Vehicle_License_Plate_Recognition)|-|CCPD|车牌识别| +|[chinese_cht_ocr_db_crnn_mobile](image/text_recognition/chinese_cht_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|繁体中文文字识别| +|[japan_ocr_db_crnn_mobile](image/text_recognition/japan_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|日文文字识别| +|[korean_ocr_db_crnn_mobile](image/text_recognition/korean_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|韩文文字识别| +|[german_ocr_db_crnn_mobile](image/text_recognition/german_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|德文文字识别| +|[french_ocr_db_crnn_mobile](image/text_recognition/french_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|法文文字识别| +|[latin_ocr_db_crnn_mobile](image/text_recognition/latin_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|拉丁文文字识别| +|[cyrillic_ocr_db_crnn_mobile](image/text_recognition/cyrillic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|斯拉夫文文字识别| +|[multi_languages_ocr_db_crnn](image/text_recognition/multi_languages_ocr_db_crnn)|Differentiable Binarization+RCNN|icdar2015数据集|多语言文字识别| +|[kannada_ocr_db_crnn_mobile](image/text_recognition/kannada_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|卡纳达文文字识别| +|[arabic_ocr_db_crnn_mobile](image/text_recognition/arabic_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|阿拉伯文文字识别| +|[telugu_ocr_db_crnn_mobile](image/text_recognition/telugu_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|泰卢固文文字识别| +|[devanagari_ocr_db_crnn_mobile](image/text_recognition/devanagari_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|梵文文字识别| +|[tamil_ocr_db_crnn_mobile](image/text_recognition/tamil_ocr_db_crnn_mobile)|Differentiable Binarization+CRNN|icdar2015数据集|泰米尔文文字识别| + + + - ### 图像编辑 + +|module|网络|数据集|简介| +|--|--|--|--| +|[realsr](image/Image_editing/super_resolution/realsr)|LP-KPN|RealSR dataset|图像/视频超分-4倍| +|[deoldify](image/Image_editing/colorization/deoldify)|GAN|ILSVRC 2012|黑白照片/视频着色| +|[photo_restoration](image/Image_editing/colorization/photo_restoration)|基于deoldify和realsr模型|-|老照片修复| +|[user_guided_colorization](image/Image_editing/colorization/user_guided_colorization)|siggraph|ILSVRC 2012|图像着色| +|[falsr_c](image/Image_editing/super_resolution/falsr_c)|falsr_c| DIV2k|轻量化超分-2倍| +|[dcscn](image/Image_editing/super_resolution/dcscn)|dcscn| DIV2k|轻量化超分-2倍| +|[falsr_a](image/Image_editing/super_resolution/falsr_a)|falsr_a| DIV2k|轻量化超分-2倍| +|[falsr_b](image/Image_editing/super_resolution/falsr_b)|falsr_b|DIV2k|轻量化超分-2倍| + + - ### 实例分割 + +|module|网络|数据集|简介| +|--|--|--|--| +|[solov2](image/instance_segmentation/solov2)|-|COCO2014|实例分割| + + - ### 目标检测 + +|module|网络|数据集|简介| +|--|--|--|--| +|[faster_rcnn_resnet50_coco2017](image/object_detection/faster_rcnn_resnet50_coco2017)|faster_rcnn|COCO2017|| +|[ssd_vgg16_512_coco2017](image/object_detection/ssd_vgg16_512_coco2017)|SSD|COCO2017|| +|[faster_rcnn_resnet50_fpn_venus](image/object_detection/faster_rcnn_resnet50_fpn_venus)|faster_rcnn|百度自建数据集|大规模通用目标检测| +|[ssd_vgg16_300_coco2017](image/object_detection/ssd_vgg16_300_coco2017)|||| +|[yolov3_resnet34_coco2017](image/object_detection/yolov3_resnet34_coco2017)|YOLOv3|COCO2017|| +|[yolov3_darknet53_pedestrian](image/object_detection/yolov3_darknet53_pedestrian)|YOLOv3|百度自建大规模行人数据集|行人检测| +|[yolov3_mobilenet_v1_coco2017](image/object_detection/yolov3_mobilenet_v1_coco2017)|YOLOv3|COCO2017|| +|[ssd_mobilenet_v1_pascal](image/object_detection/ssd_mobilenet_v1_pascal)|SSD|PASCAL VOC|| +|[faster_rcnn_resnet50_fpn_coco2017](image/object_detection/faster_rcnn_resnet50_fpn_coco2017)|faster_rcnn|COCO2017|| +|[yolov3_darknet53_coco2017](image/object_detection/yolov3_darknet53_coco2017)|YOLOv3|COCO2017|| +|[yolov3_darknet53_vehicles](image/object_detection/yolov3_darknet53_vehicles)|YOLOv3|百度自建大规模车辆数据集|车辆检测| +|[yolov3_darknet53_venus](image/object_detection/yolov3_darknet53_venus)|YOLOv3|百度自建数据集|大规模通用检测| +|[yolov3_resnet50_vd_coco2017](image/object_detection/yolov3_resnet50_vd_coco2017)|YOLOv3|COCO2017|| + + - ### 深度估计 + +|module|网络|数据集|简介| +|--|--|--|--| +|[MiDaS_Large](image/depth_estimation/MiDaS_Large)|-|3D Movies, WSVD, ReDWeb, MegaDepth|| +|[MiDaS_Small](image/depth_estimation/MiDaS_Small)|-|3D Movies, WSVD, ReDWeb, MegaDepth, etc.|| + +## 文本 + - ### 文本生成 + +|module|网络|数据集|简介| +|--|--|--|--| +|[ernie_gen](text/text_generation/ernie_gen)|ERNIE-GEN|-|面向生成任务的预训练-微调框架| +|[ernie_gen_poetry](text/text_generation/ernie_gen_poetry)|ERNIE-GEN|开源诗歌数据集|诗歌生成| +|[ernie_gen_couplet](text/text_generation/ernie_gen_couplet)|ERNIE-GEN|开源对联数据集|对联生成| +|[ernie_gen_lover_words](text/text_generation/ernie_gen_lover_words)|ERNIE-GEN|网络情诗、情话数据|情话生成| +|[ernie_tiny_couplet](text/text_generation/ernie_tiny_couplet)|Eernie_tiny|开源对联数据集|对联生成| +|[ernie_gen_acrostic_poetry](text/text_generation/ernie_gen_acrostic_poetry)|ERNIE-GEN|开源诗歌数据集|藏头诗生成| +|[Rumor_prediction](text/text_generation/Rumor_prediction)|-|新浪微博中文谣言数据|谣言预测| +|[plato-mini](text/text_generation/plato-mini)|Unified Transformer|十亿级别的中文对话数据|中文对话| +|[plato2_en_large](text/text_generation/plato2_en_large)|plato2|开放域多轮数据集|超大规模生成式对话| +|[plato2_en_base](text/text_generation/plato2_en_base)|plato2|开放域多轮数据集|超大规模生成式对话| +|[CPM_LM](text/text_generation/CPM_LM)|GPT-2|自建数据集|中文文本生成| +|[unified_transformer-12L-cn](text/text_generation/unified_transformer-12L-cn)|Unified Transformer|千万级别中文会话数据|人机多轮对话| +|[unified_transformer-12L-cn-luge](text/text_generation/unified_transformer-12L-cn-luge)|Unified Transformer|千言对话数据集|人机多轮对话| +|[reading_pictures_writing_poems](text/text_generation/reading_pictures_writing_poems)|多网络级联|-|看图写诗| +|[GPT2_CPM_LM](text/text_generation/GPT2_CPM_LM)|||问答类文本生成| +|[GPT2_Base_CN](text/text_generation/GPT2_Base_CN)|||问答类文本生成| + + - ### 词向量 + +
expand
+ +|module|网络|数据集|简介| +|--|--|--|--| +|[w2v_weibo_target_word-bigram_dim300](text/embedding/w2v_weibo_target_word-bigram_dim300)|w2v|weibo|| +|[w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_literature_target_word-word_dim300](text/embedding/w2v_literature_target_word-word_dim300)|w2v|literature|| +|[word2vec_skipgram](text/embedding/word2vec_skipgram)|skip-gram|百度自建数据集|| +|[w2v_sogou_target_word-char_dim300](text/embedding/w2v_sogou_target_word-char_dim300)|w2v|sogou|| +|[w2v_weibo_target_bigram-char_dim300](text/embedding/w2v_weibo_target_bigram-char_dim300)|w2v|weibo|| +|[w2v_zhihu_target_word-bigram_dim300](text/embedding/w2v_zhihu_target_word-bigram_dim300)|w2v|zhihu|| +|[w2v_financial_target_word-word_dim300](text/embedding/w2v_financial_target_word-word_dim300)|w2v|financial|| +|[w2v_wiki_target_word-word_dim300](text/embedding/w2v_wiki_target_word-word_dim300)|w2v|wiki|| +|[w2v_baidu_encyclopedia_context_word-word_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-word_dim300)|w2v|baidu_encyclopedia|| +|[w2v_weibo_target_word-word_dim300](text/embedding/w2v_weibo_target_word-word_dim300)|w2v|weibo|| +|[w2v_zhihu_target_bigram-char_dim300](text/embedding/w2v_zhihu_target_bigram-char_dim300)|w2v|zhihu|| +|[w2v_zhihu_target_word-word_dim300](text/embedding/w2v_zhihu_target_word-word_dim300)|w2v|zhihu|| +|[w2v_people_daily_target_word-char_dim300](text/embedding/w2v_people_daily_target_word-char_dim300)|w2v|people_daily|| +|[w2v_sikuquanshu_target_word-word_dim300](text/embedding/w2v_sikuquanshu_target_word-word_dim300)|w2v|sikuquanshu|| +|[glove_twitter_target_word-word_dim200_en](text/embedding/glove_twitter_target_word-word_dim200_en)|fasttext|twitter|| +|[fasttext_crawl_target_word-word_dim300_en](text/embedding/fasttext_crawl_target_word-word_dim300_en)|fasttext|crawl|| +|[w2v_wiki_target_word-bigram_dim300](text/embedding/w2v_wiki_target_word-bigram_dim300)|w2v|wiki|| +|[w2v_baidu_encyclopedia_context_word-character_char1-1_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300)|w2v|baidu_encyclopedia|| +|[glove_wiki2014-gigaword_target_word-word_dim300_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim300_en)|glove|wiki2014-gigaword|| +|[glove_wiki2014-gigaword_target_word-word_dim50_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim50_en)|glove|wiki2014-gigaword|| +|[w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_wiki_target_bigram-char_dim300](text/embedding/w2v_wiki_target_bigram-char_dim300)|w2v|wiki|| +|[w2v_baidu_encyclopedia_target_word-character_char1-1_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300)|w2v|baidu_encyclopedia|| +|[w2v_financial_target_bigram-char_dim300](text/embedding/w2v_financial_target_bigram-char_dim300)|w2v|financial|| +|[glove_wiki2014-gigaword_target_word-word_dim200_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim200_en)|glove|wiki2014-gigaword|| +|[w2v_financial_target_word-bigram_dim300](text/embedding/w2v_financial_target_word-bigram_dim300)|w2v|financial|| +|[w2v_mixed-large_target_word-char_dim300](text/embedding/w2v_mixed-large_target_word-char_dim300)|w2v|mixed|| +|[w2v_baidu_encyclopedia_target_word-wordPosition_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-wordPosition_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_target_word-wordLR_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-wordLR_dim300)|w2v|baidu_encyclopedia|| +|[w2v_sogou_target_bigram-char_dim300](text/embedding/w2v_sogou_target_bigram-char_dim300)|w2v|sogou|| +|[w2v_weibo_target_word-char_dim300](text/embedding/w2v_weibo_target_word-char_dim300)|w2v|weibo|| +|[w2v_people_daily_target_word-word_dim300](text/embedding/w2v_people_daily_target_word-word_dim300)|w2v|people_daily|| +|[w2v_zhihu_target_word-char_dim300](text/embedding/w2v_zhihu_target_word-char_dim300)|w2v|zhihu|| +|[w2v_wiki_target_word-char_dim300](text/embedding/w2v_wiki_target_word-char_dim300)|w2v|wiki|| +|[w2v_sogou_target_word-bigram_dim300](text/embedding/w2v_sogou_target_word-bigram_dim300)|w2v|sogou|| +|[w2v_financial_target_word-char_dim300](text/embedding/w2v_financial_target_word-char_dim300)|w2v|financial|| +|[w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300)|w2v|baidu_encyclopedia|| +|[glove_wiki2014-gigaword_target_word-word_dim100_en](text/embedding/glove_wiki2014-gigaword_target_word-word_dim100_en)|glove|wiki2014-gigaword|| +|[w2v_baidu_encyclopedia_target_word-character_char1-4_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300)|w2v|baidu_encyclopedia|| +|[w2v_sogou_target_word-word_dim300](text/embedding/w2v_sogou_target_word-word_dim300)|w2v|sogou|| +|[w2v_literature_target_word-char_dim300](text/embedding/w2v_literature_target_word-char_dim300)|w2v|literature|| +|[w2v_baidu_encyclopedia_target_bigram-char_dim300](text/embedding/w2v_baidu_encyclopedia_target_bigram-char_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_target_word-word_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-word_dim300)|w2v|baidu_encyclopedia|| +|[glove_twitter_target_word-word_dim100_en](text/embedding/glove_twitter_target_word-word_dim100_en)|glove|crawl|| +|[w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_context_word-character_char1-4_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300)|w2v|baidu_encyclopedia|| +|[w2v_literature_target_bigram-char_dim300](text/embedding/w2v_literature_target_bigram-char_dim300)|w2v|literature|| +|[fasttext_wiki-news_target_word-word_dim300_en](text/embedding/fasttext_wiki-news_target_word-word_dim300_en)|fasttext|wiki-news|| +|[w2v_people_daily_target_word-bigram_dim300](text/embedding/w2v_people_daily_target_word-bigram_dim300)|w2v|people_daily|| +|[w2v_mixed-large_target_word-word_dim300](text/embedding/w2v_mixed-large_target_word-word_dim300)|w2v|mixed|| +|[w2v_people_daily_target_bigram-char_dim300](text/embedding/w2v_people_daily_target_bigram-char_dim300)|w2v|people_daily|| +|[w2v_literature_target_word-bigram_dim300](text/embedding/w2v_literature_target_word-bigram_dim300)|w2v|literature|| +|[glove_twitter_target_word-word_dim25_en](text/embedding/glove_twitter_target_word-word_dim25_en)|glove|twitter|| +|[w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_sikuquanshu_target_word-bigram_dim300](text/embedding/w2v_sikuquanshu_target_word-bigram_dim300)|w2v|sikuquanshu|| +|[w2v_baidu_encyclopedia_context_word-character_char1-2_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300)|w2v|baidu_encyclopedia|| +|[glove_twitter_target_word-word_dim50_en](text/embedding/glove_twitter_target_word-word_dim50_en)|glove|twitter|| +|[w2v_baidu_encyclopedia_context_word-wordLR_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-wordLR_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_target_word-character_char1-2_dim300](text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300)|w2v|baidu_encyclopedia|| +|[w2v_baidu_encyclopedia_context_word-wordPosition_dim300](text/embedding/w2v_baidu_encyclopedia_context_word-wordPosition_dim300)|w2v|baidu_encyclopedia|| + +
+ + - ### 机器翻译 + +|module|网络|数据集|简介| +|--|--|--|--| +|[transformer_zh-en](text/machine_translation/transformer/transformer_zh-en)|Transformer|CWMT2021|中文译英文| +|[transformer_en-de](text/machine_translation/transformer/transformer_en-de)|Transformer|WMT14 EN-DE|英文译德文| + + - ### 语义模型 + +
expand
+ +|module|网络|数据集|简介| +|--|--|--|--| +|[chinese_electra_small](text/language_model/chinese_electra_small)|||| +|[chinese_electra_base](text/language_model/chinese_electra_base)|||| +|[roberta-wwm-ext-large](text/language_model/roberta-wwm-ext-large)|roberta-wwm-ext-large|百度自建数据集|| +|[chinese-bert-wwm-ext](text/language_model/chinese_bert_wwm_ext)|chinese-bert-wwm-ext|百度自建数据集|| +|[lda_webpage](text/language_model/lda_webpage)|LDA|百度自建网页领域数据集|| +|[lda_novel](text/language_model/lda_novel)|||| +|[bert-base-multilingual-uncased](text/language_model/bert-base-multilingual-uncased)|||| +|[rbt3](text/language_model/rbt3)|||| +|[ernie_v2_eng_base](text/language_model/ernie_v2_eng_base)|ernie_v2_eng_base|百度自建数据集|| +|[bert-base-multilingual-cased](text/language_model/bert-base-multilingual-cased)|||| +|[rbtl3](text/language_model/rbtl3)|||| +|[chinese-bert-wwm](text/language_model/chinese_bert_wwm)|chinese-bert-wwm|百度自建数据集|| +|[bert-large-uncased](text/language_model/bert-large-uncased)|||| +|[slda_novel](text/language_model/slda_novel)|||| +|[slda_news](text/language_model/slda_news)|||| +|[electra_small](text/language_model/electra_small)|||| +|[slda_webpage](text/language_model/slda_webpage)|||| +|[bert-base-cased](text/language_model/bert-base-cased)|||| +|[slda_weibo](text/language_model/slda_weibo)|||| +|[roberta-wwm-ext](text/language_model/roberta-wwm-ext)|roberta-wwm-ext|百度自建数据集|| +|[bert-base-uncased](text/language_model/bert-base-uncased)|||| +|[electra_large](text/language_model/electra_large)|||| +|[ernie](text/language_model/ernie)|ernie-1.0|百度自建数据集|| +|[simnet_bow](text/language_model/simnet_bow)|BOW|百度自建数据集|| +|[ernie_tiny](text/language_model/ernie_tiny)|ernie_tiny|百度自建数据集|| +|[bert-base-chinese](text/language_model/bert-base-chinese)|bert-base-chinese|百度自建数据集|| +|[lda_news](text/language_model/lda_news)|LDA|百度自建新闻领域数据集|| +|[electra_base](text/language_model/electra_base)|||| +|[ernie_v2_eng_large](text/language_model/ernie_v2_eng_large)|ernie_v2_eng_large|百度自建数据集|| +|[bert-large-cased](text/language_model/bert-large-cased)|||| + +
+ + + - ### 情感分析 + +|module|网络|数据集|简介| +|--|--|--|--| +|[ernie_skep_sentiment_analysis](text/sentiment_analysis/ernie_skep_sentiment_analysis)|SKEP|百度自建数据集|句子级情感分析| +|[emotion_detection_textcnn](text/sentiment_analysis/emotion_detection_textcnn)|TextCNN|百度自建数据集|对话情绪识别| +|[senta_bilstm](text/sentiment_analysis/senta_bilstm)|BiLSTM|百度自建数据集|中文情感倾向分析| +|[senta_bow](text/sentiment_analysis/senta_bow)|BOW|百度自建数据集|中文情感倾向分析| +|[senta_gru](text/sentiment_analysis/senta_gru)|GRU|百度自建数据集|中文情感倾向分析| +|[senta_lstm](text/sentiment_analysis/senta_lstm)|LSTM|百度自建数据集|中文情感倾向分析| +|[senta_cnn](text/sentiment_analysis/senta_cnn)|CNN|百度自建数据集|中文情感倾向分析| + + - ### 句法分析 + +|module|网络|数据集|简介| +|--|--|--|--| +|[DDParser](text/syntactic_analysis/DDParser)|Deep Biaffine Attention|搜索query、网页文本、语音输入等数据|句法分析| + + - ### 同声传译 + +|module|网络|数据集|简介| +|--|--|--|--| +|[transformer_nist_wait_1](text/simultaneous_translation/stacl/transformer_nist_wait_1)|transformer|NIST 2008-中英翻译数据集|中译英-wait-1策略| +|[transformer_nist_wait_3](text/simultaneous_translation/stacl/transformer_nist_wait_3)|transformer|NIST 2008-中英翻译数据集|中译英-wait-3策略| +|[transformer_nist_wait_5](text/simultaneous_translation/stacl/transformer_nist_wait_5)|transformer|NIST 2008-中英翻译数据集|中译英-wait-5策略| +|[transformer_nist_wait_7](text/simultaneous_translation/stacl/transformer_nist_wait_7)|transformer|NIST 2008-中英翻译数据集|中译英-wait-7策略| +|[transformer_nist_wait_all](text/simultaneous_translation/stacl/transformer_nist_wait_all)|transformer|NIST 2008-中英翻译数据集|中译英-waitk=-1策略| + + + - ### 词法分析 + +|module|网络|数据集|简介| +|--|--|--|--| +|[jieba_paddle](text/lexical_analysis/jieba_paddle)|BiGRU+CRF|百度自建数据集|百度自研联合的词法分析模型,能整体性地完成中文分词、词性标注、专名识别任务。在百度自建数据集上评测,LAC效果:Precision=88.0%,Recall=88.7%,F1-Score=88.4%。| +|[lac](text/lexical_analysis/lac)|BiGRU+CRF|百度自建数据集|jieba使用Paddle搭建的切词网络(双向GRU)。同时支持jieba的传统切词方法,如精确模式、全模式、搜索引擎模式等切词模式。| + + - ### 标点恢复 + +|module|网络|数据集|简介| +|--|--|--|--| +|[auto_punc](text/punctuation_restoration/auto_punc)|Ernie-1.0|WuDaoCorpora 2.0|自动添加7种标点符号| + + - ### 文本审核 + +|module|网络|数据集|简介| +|--|--|--|--| +|[porn_detection_cnn](text/text_review/porn_detection_cnn)|CNN|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| +|[porn_detection_gru](text/text_review/porn_detection_gru)|GRU|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| +|[porn_detection_lstm](text/text_review/porn_detection_lstm)|LSTM|百度自建数据集|色情检测,自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别| + +## 语音 + - ### 声音克隆 + +|module|网络|数据集|简介| +|--|--|--|--| +|[ge2e_fastspeech2_pwgan](audio/voice_cloning/ge2e_fastspeech2_pwgan)|FastSpeech2|AISHELL-3|中文语音克隆| +|[lstm_tacotron2](audio/voice_cloning/lstm_tacotron2)|LSTM、Tacotron2、WaveFlow|AISHELL-3|中文语音克隆| + + - ### 语音合成 + +|module|网络|数据集|简介| +|--|--|--|--| +|[transformer_tts_ljspeech](audio/tts/transformer_tts_ljspeech)|Transformer|LJSpeech-1.1|英文语音合成| +|[fastspeech_ljspeech](audio/tts/fastspeech_ljspeech)|FastSpeech|LJSpeech-1.1|英文语音合成| +|[fastspeech2_baker](audio/tts/fastspeech2_baker)|FastSpeech2|Chinese Standard Mandarin Speech Copus|中文语音合成| +|[fastspeech2_ljspeech](audio/tts/fastspeech2_ljspeech)|FastSpeech2|LJSpeech-1.1|英文语音合成| +|[deepvoice3_ljspeech](audio/tts/deepvoice3_ljspeech)|DeepVoice3|LJSpeech-1.1|英文语音合成| + + - ### 语音识别 + +|module|网络|数据集|简介| +|--|--|--|--| +|[deepspeech2_aishell](audio/asr/deepspeech2_aishell)|DeepSpeech2|AISHELL-1|中文语音识别| +|[deepspeech2_librispeech](audio/asr/deepspeech2_librispeech)|DeepSpeech2|LibriSpeech|英文语音识别| +|[u2_conformer_aishell](audio/asr/u2_conformer_aishell)|Conformer|AISHELL-1|中文语音识别| +|[u2_conformer_wenetspeech](audio/asr/u2_conformer_wenetspeech)|Conformer|WenetSpeech|中文语音识别| +|[u2_conformer_librispeech](audio/asr/u2_conformer_librispeech)|Conformer|LibriSpeech|英文语音识别| + + + - ### 声音分类 + +|module|网络|数据集|简介| +|--|--|--|--| +|[panns_cnn6](audio/audio_classification/PANNs/cnn6)|PANNs|Google Audioset|主要包含4个卷积层和2个全连接层,模型参数为4.5M。经过预训练后,可以用于提取音频的embbedding,维度是512| +|[panns_cnn14](audio/audio_classification/PANNs/cnn14)|PANNs|Google Audioset|主要包含12个卷积层和2个全连接层,模型参数为79.6M。经过预训练后,可以用于提取音频的embbedding,维度是2048| +|[panns_cnn10](audio/audio_classification/PANNs/cnn10)|PANNs|Google Audioset|主要包含8个卷积层和2个全连接层,模型参数为4.9M。经过预训练后,可以用于提取音频的embbedding,维度是512| + +## 视频 + - ### 视频分类 + +|module|网络|数据集|简介| +|--|--|--|--| +|[videotag_tsn_lstm](video/classification/videotag_tsn_lstm)|TSN + AttentionLSTM|百度自建数据集|大规模短视频分类打标签| +|[tsn_kinetics400](video/classification/tsn_kinetics400)|TSN|Kinetics-400|视频分类| +|[tsm_kinetics400](video/classification/tsm_kinetics400)|TSM|Kinetics-400|视频分类| +|[stnet_kinetics400](video/classification/stnet_kinetics400)|StNet|Kinetics-400|视频分类| +|[nonlocal_kinetics400](video/classification/nonlocal_kinetics400)|Non-local|Kinetics-400|视频分类| + + + - ### 视频修复 + +|module|网络|数据集|简介| +|--|--|--|--| +|[SkyAR](video/Video_editing/SkyAR)|UNet|UNet|视频换天| + + - ### 多目标追踪 + +|module|网络|数据集|简介| +|--|--|--|--| +|[fairmot_dla34](video/multiple_object_tracking/fairmot_dla34)|CenterNet|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|实时多目标跟踪| +|[jde_darknet53](video/multiple_object_tracking/jde_darknet53)|YOLOv3|Caltech Pedestrian+CityPersons+CUHK-SYSU+PRW+ETHZ+MOT17|多目标跟踪-兼顾精度和速度| + +## 工业应用 + + - ### 表针识别 + +|module|网络|数据集|简介| +|--|--|--|--| +|[WatermeterSegmentation](image/semantic_segmentation/WatermeterSegmentation)|DeepLabV3|水表的数字表盘分割数据集|水表的数字表盘分割| diff --git a/modules/audio/asr/deepspeech2_aishell/README.md b/modules/audio/asr/deepspeech2_aishell/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a75ba672279a75e60d7465989c6452dcb65817fa --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/README.md @@ -0,0 +1,153 @@ +# deepspeech2_aishell + +|模型名称|deepspeech2_aishell| +| :--- | :---: | +|类别|语音-语音识别| +|网络|DeepSpeech2| +|数据集|AISHELL-1| +|是否支持Fine-tuning|否| +|模型大小|306MB| +|最新更新日期|2021-10-20| +|数据指标|中文CER 0.065| + +## 一、模型基本信息 + +### 模型介绍 + +DeepSpeech2是百度于2015年提出的适用于英文和中文的end-to-end语音识别模型。deepspeech2_aishell使用了DeepSpeech2离线模型的结构,模型主要由2层卷积网络和3层GRU组成,并在中文普通话开源语音数据集[AISHELL-1](http://www.aishelltech.com/kysjcp)进行了预训练,该模型在其测试集上的CER指标是0.065。 + + +

+
+

+ +更多详情请参考[Deep Speech 2: End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595) + +## 二、安装 + +- ### 1、系统依赖 + + - libsndfile, swig >= 3.0 + - Linux + ```shell + $ sudo apt-get install libsndfile swig + or + $ sudo yum install libsndfile swig + ``` + - MacOs + ``` + $ brew install libsndfile swig + ``` + +- ### 2、环境依赖 + - swig_decoder: + ``` + git clone https://github.com/PaddlePaddle/DeepSpeech.git && cd DeepSpeech && git reset --hard b53171694e7b87abe7ea96870b2f4d8e0e2b1485 && cd deepspeech/decoders/ctcdecoder/swig && sh setup.sh + ``` + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 3、安装 + + - ```shell + $ hub install deepspeech2_aishell + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + # 采样率为16k,格式为wav的中文语音音频 + wav_file = '/PATH/TO/AUDIO' + + model = hub.Module( + name='deepspeech2_aishell', + version='1.0.0') + text = model.speech_recognize(wav_file) + + print(text) + ``` + +- ### 2、API + - ```python + def check_audio(audio_file) + ``` + - 检查输入音频格式和采样率是否满足为16000 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + + - ```python + def speech_recognize( + audio_file, + device='cpu', + ) + ``` + - 将输入的音频识别成文字 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `text`:str类型,返回输入音频的识别文字结果。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的语音识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m deepspeech2_aishell + ``` + + - 这样就完成了一个语音识别服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 需要识别的音频的存放路径,确保部署服务的机器可访问 + file = '/path/to/input.wav' + + # 以key的方式指定text传入预测方法的时的参数,此例中为"audio_file" + data = {"audio_file": file} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/deepspeech2_aishell" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install deepspeech2_aishell + ``` diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/__init__.py b/modules/audio/asr/deepspeech2_aishell/__init__.py similarity index 100% rename from modules/image/object_detection/retinanet_resnet50_fpn_coco2017/__init__.py rename to modules/audio/asr/deepspeech2_aishell/__init__.py diff --git a/modules/audio/asr/deepspeech2_aishell/assets/conf/augmentation.json b/modules/audio/asr/deepspeech2_aishell/assets/conf/augmentation.json new file mode 100644 index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93 --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/assets/conf/augmentation.json @@ -0,0 +1 @@ +{} diff --git a/modules/audio/asr/deepspeech2_aishell/assets/conf/deepspeech2.yaml b/modules/audio/asr/deepspeech2_aishell/assets/conf/deepspeech2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecbe912386c1968d2f399dfca9769080d7537dfc --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/assets/conf/deepspeech2.yaml @@ -0,0 +1,68 @@ +# https://yaml.org/type/float.html +data: + train_manifest: data/manifest.train + dev_manifest: data/manifest.dev + test_manifest: data/manifest.test + min_input_len: 0.0 + max_input_len: 27.0 # second + min_output_len: 0.0 + max_output_len: .inf + min_output_input_ratio: 0.00 + max_output_input_ratio: .inf + +collator: + batch_size: 64 # one gpu + mean_std_filepath: data/mean_std.json + unit_type: char + vocab_filepath: data/vocab.txt + augmentation_config: conf/augmentation.json + random_seed: 0 + spm_model_prefix: + spectrum_type: linear + feat_dim: + delta_delta: False + stride_ms: 10.0 + window_ms: 20.0 + n_fft: None + max_freq: None + target_sample_rate: 16000 + use_dB_normalization: True + target_dB: -20 + dither: 1.0 + keep_transcription_text: False + sortagrad: True + shuffle_method: batch_shuffle + num_workers: 2 + +model: + num_conv_layers: 2 + num_rnn_layers: 3 + rnn_layer_size: 1024 + use_gru: True + share_rnn_weights: False + blank_id: 0 + ctc_grad_norm_type: instance + +training: + n_epoch: 80 + accum_grad: 1 + lr: 2e-3 + lr_decay: 0.83 + weight_decay: 1e-06 + global_grad_clip: 3.0 + log_interval: 100 + checkpoint: + kbest_n: 50 + latest_n: 5 + +decoding: + batch_size: 128 + error_rate_type: cer + decoding_method: ctc_beam_search + lang_model_path: data/lm/zh_giga.no_cna_cmn.prune01244.klm + alpha: 1.9 + beta: 5.0 + beam_size: 300 + cutoff_prob: 0.99 + cutoff_top_n: 40 + num_proc_bsearch: 10 diff --git a/modules/audio/asr/deepspeech2_aishell/assets/data/mean_std.json b/modules/audio/asr/deepspeech2_aishell/assets/data/mean_std.json new file mode 100644 index 0000000000000000000000000000000000000000..6770184f3522056a533ead7a68537686f799ecc5 --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/assets/data/mean_std.json @@ -0,0 +1 @@ +{"mean_stat": [-13505966.65209869, -12778154.889588555, -13487728.30750011, -12897344.94123812, -12472281.490772562, -12631566.475106332, -13391790.349327326, -14045382.570026815, -14159320.465516506, -14273422.438486755, -14639805.161347123, -15145380.07768254, -15612893.133258691, -15938542.05012206, -16115293.502621327, -16188225.698757892, -16317206.280373082, -16500598.476283036, -16671564.297937019, -16804599.860397574, -16916423.142814968, -17011785.59439087, -17075067.62262626, -17154580.16740178, -17257812.961825978, -17355683.228599995, -17441455.258318607, -17473199.925130684, -17488835.5763828, -17491232.15414511, -17485000.29006962, -17499471.646940477, -17551398.97122984, -17641732.10682403, -17757209.077974595, -17843801.500521667, -17935647.58641936, -18020362.347413756, -18117633.806080323, -18232427.58935143, -18316024.35215119, -18378789.145393644, -18421147.25807373, -18445805.18294822, -18460946.27810118, -18467914.04034822, -18469404.319909714, -18469606.974339806, -18470754.294192698, -18458320.91921723, -18441354.111811973, -18428332.216321833, -18422281.413955193, -18433421.585668042, -18460521.025954794, -18494800.856363494, -18539532.288011573, -18583823.79899225, -18614474.56256926, -18646872.180154275, -18661137.85367877, -18673590.719379324, -18702967.62040798, -18736434.748098046, -18777912.13098326, -18794675.486509323, -18837225.856196072, -18874872.796128694, -18927340.44407057, -18994929.076545004, -19060701.164406348, -19118006.18996682, -19175792.05766062, -19230755.996405277, -19270174.594219487, -19334788.35904946, -19401456.988906194, -19484580.095938426, -19582040.4715673, -19696598.86662636, -19810401.513227757, -19931755.37941177, -20021867.47620737, -20082298.984455004, -20114708.336475413, -20143802.72793865, -20146821.988139726, -20165613.317683898, -20189938.602584295, -20220059.08673595, -20242848.528134122, -20250859.979931064, -20267382.93048284, -20267964.544716164, -20261372.89563879, -20252878.74023849, -20247550.771284755, -20231778.31093504, -20231376.103159923, -20236926.52293088, -20248068.41488535, -20255076.901920393, -20262924.167151034, -20263926.583205637, -20263790.273742784, -20268560.080967404, -20268997.150654405, -20269810.816284582, -20267771.864327505, -20256472.703380838, -20241790.559690386, -20241865.794732895, -20244924.716114976, -20249736.631184842, -20257257.816903576, -20268027.212145977, -20277399.95533857, -20281840.8112546, -20270512.52002465, -20255938.63066214, -20242421.685443826, -20241986.654626504, -20237836.034444932, -20231458.31132546, -20218092.819713395, -20204994.19634715, -20198880.142133974, -20197376.49014031, -20198117.60450857, -20197443.473929476, -20191142.03632657, -20174428.452719454, -20159204.32090646, -20137981.294740904, -20124944.79897834, -20112774.604521394, -20109389.248600915, -20115248.61302806, -20117743.853294585, -20123076.93515528, -20132224.95454374, -20147099.26793121, -20169581.367630124, -20190957.518733896, -20215197.057997894, -20242033.589256056, -20282032.217160087, -20316778.653784916, -20360354.215504933, -20425089.908502825, -20534553.0465662, -20737928.349233944, -21091705.14104186, -21646013.197923105, -22403182.076235127, -23313516.63322832, -24244679.879594248, -25027534.00417361, -25502455.708560493, -25665136.744125813, -26602318.88405537], "var_stat": [209924783.1093623, 185218712.4577822, 209991180.89829063, 196198511.40798286, 186098265.7827955, 191905798.58923203, 214281935.29191792, 235042114.51049897, 240179456.24597096, 244657890.3963041, 256099586.32657292, 271849135.9872555, 287174069.13527167, 298171137.28863454, 304112589.91933817, 306553976.2206335, 310813670.30674237, 316958840.3099824, 322651440.3639528, 327213725.196089, 331252123.26114285, 334856188.3081607, 337217897.6545214, 340385427.82557064, 344400488.5633641, 348086880.08086526, 351349070.53148264, 352648076.18415344, 353409462.33704513, 353598061.4967693, 353405322.74993587, 353917215.6834277, 355784796.898883, 359222461.3224974, 363671441.7428676, 366908651.69908494, 370304677.0615045, 373477194.79721, 377174088.9808273, 381531608.6574547, 384703574.426059, 387104126.9474883, 388723211.11308575, 389687817.27351815, 390351031.4418706, 390659006.3690262, 390704649.89417714, 390702370.1919126, 390731862.59274197, 390216004.4126628, 389516083.054853, 389017745.636457, 388788872.1127645, 389269311.2239042, 390401819.5968815, 391842612.97859454, 393708801.05223197, 395569598.4694, 396868892.67152405, 398210915.02133286, 398743299.4753882, 399330344.88417244, 400565940.1325846, 401901693.4656316, 403513855.43933284, 404103248.96526104, 405986814.274556, 407507145.4104169, 409598353.6517908, 412453848.0248063, 415138273.0558441, 417479272.96907294, 419785633.3276395, 422003065.1681787, 423610264.8868346, 426260552.96545905, 428973536.3620236, 432368654.40899384, 436359561.5468266, 441119512.777527, 445884989.25794005, 451037422.65838546, 454872292.24179226, 457497136.8780015, 458904066.0675219, 460155836.4432799, 460272943.80738074, 461087498.6828549, 462144907.7850926, 463483598.81228757, 464530694.44478536, 464971538.85301507, 465771535.6019992, 465936698.93801653, 465741012.7287712, 465448625.0011534, 465296363.8603534, 464718299.2207512, 464720391.25778216, 465016640.5248736, 465564374.0248998, 465982788.8695927, 466425068.01245564, 466595649.90489674, 466707658.8296169, 467015570.78026086, 467099213.08769494, 467201640.15951264, 467163862.3709329, 466727597.56313753, 466174871.71213347, 466255498.45248336, 466439062.65458614, 466693130.99620277, 467068587.1422199, 467536070.1402474, 467955819.1549621, 468187227.1069643, 467742976.2778335, 467159585.250493, 466592359.52916145, 466583195.8099961, 466424348.9572719, 466155323.6074322, 465569620.1801811, 465021642.5158305, 464757658.6383867, 464713882.60103834, 464724239.2941314, 464679163.728191, 464407007.8705965, 463660736.0136739, 463001339.2385198, 462077058.47595775, 461505071.67199403, 460946277.95973784, 460816158.9197017, 461123589.268546, 461232998.1572812, 461445601.0442877, 461803238.28569543, 462436966.22005004, 463391404.7434971, 464299608.85523456, 465319405.3931429, 466432961.70208246, 468168080.3331244, 469640808.6809098, 471501539.22440934, 474301795.1694898, 479155711.93441755, 488314271.10405815, 504537056.23994666, 530509400.5201074, 566892036.4437443, 611792826.0442055, 658913502.9004005, 699716882.9169292, 725237302.8248898, 734259159.9571886, 789267050.8287783], "frame_num": 899422} diff --git a/modules/audio/asr/deepspeech2_aishell/assets/data/vocab.txt b/modules/audio/asr/deepspeech2_aishell/assets/data/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..e272b5760cafeeec94bbbb7161e9c23f1358af3b --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/assets/data/vocab.txt @@ -0,0 +1,4301 @@ + + +一 +丁 +七 +万 +丈 +三 +上 +下 +不 +与 +丐 +丑 +专 +且 +世 +丘 +丙 +业 +丛 +东 +丝 +丞 +丢 +两 +严 +丧 +个 +丫 +中 +丰 +串 +临 +丸 +丹 +为 +主 +丽 +举 +乃 +久 +么 +义 +之 +乌 +乍 +乎 +乏 +乐 +乒 +乓 +乔 +乖 +乘 +乙 +九 +乞 +也 +习 +乡 +书 +买 +乱 +乳 +乾 +了 +予 +争 +事 +二 +于 +亏 +云 +互 +五 +井 +亚 +些 +亟 +亡 +亢 +交 +亥 +亦 +产 +亨 +亩 +享 +京 +亭 +亮 +亲 +亳 +亵 +人 +亿 +什 +仁 +仄 +仅 +仇 +今 +介 +仍 +从 +仑 +仓 +仔 +仕 +他 +仗 +付 +仙 +仡 +代 +令 +以 +仨 +仪 +们 +仰 +仲 +件 +价 +任 +份 +仿 +企 +伉 +伊 +伍 +伎 +伏 +伐 +休 +众 +优 +伙 +会 +伞 +伟 +传 +伢 +伤 +伦 +伪 +伯 +估 +伴 +伶 +伸 +伺 +似 +伽 +佃 +但 +位 +低 +住 +佐 +佑 +体 +何 +佘 +余 +佛 +作 +佟 +你 +佣 +佩 +佬 +佳 +佶 +佼 +使 +侃 +侄 +侈 +例 +侍 +侑 +侗 +供 +依 +侠 +侣 +侥 +侦 +侧 +侨 +侬 +侮 +侯 +侵 +便 +促 +俄 +俊 +俏 +俐 +俗 +俘 +俚 +保 +俞 +信 +俨 +俩 +俪 +俭 +修 +俯 +俱 +俸 +俺 +俾 +倍 +倒 +倘 +候 +倚 +倜 +借 +倡 +倦 +倩 +倪 +债 +值 +倾 +假 +偏 +做 +停 +健 +偶 +偷 +偿 +傅 +傍 +傥 +储 +催 +傲 +傻 +像 +僚 +僧 +僮 +僵 +僻 +儒 +儿 +兀 +允 +元 +兄 +充 +兆 +先 +光 +克 +免 +兑 +兔 +兖 +党 +兜 +兢 +入 +全 +八 +公 +六 +兰 +共 +关 +兴 +兵 +其 +具 +典 +兹 +养 +兼 +兽 +冀 +内 +冈 +冉 +册 +再 +冒 +冕 +冗 +写 +军 +农 +冠 +冤 +冥 +冬 +冯 +冰 +冲 +决 +况 +冶 +冷 +冻 +净 +凄 +准 +凇 +凉 +凋 +凌 +减 +凑 +凝 +几 +凡 +凤 +凭 +凯 +凰 +凳 +凶 +凸 +凹 +出 +击 +函 +凿 +刀 +刁 +刃 +分 +切 +刊 +刑 +划 +列 +刘 +则 +刚 +创 +初 +删 +判 +刨 +利 +别 +刮 +到 +制 +刷 +券 +刹 +刺 +刻 +剁 +剂 +剃 +削 +前 +剐 +剑 +剔 +剖 +剥 +剧 +剩 +剪 +副 +割 +剽 +剿 +劈 +力 +劝 +办 +功 +加 +务 +劣 +动 +助 +努 +劫 +励 +劲 +劳 +劵 +势 +勃 +勇 +勉 +勋 +勒 +勘 +募 +勤 +勺 +勾 +勿 +匀 +包 +匆 +匈 +匕 +化 +北 +匙 +匝 +匠 +匡 +匣 +匪 +匮 +匹 +区 +医 +匾 +匿 +十 +千 +升 +午 +卉 +半 +华 +协 +卑 +卒 +卓 +单 +卖 +南 +博 +卜 +卞 +占 +卡 +卢 +卤 +卦 +卧 +卫 +卯 +印 +危 +卲 +即 +却 +卵 +卷 +卸 +卿 +厂 +厄 +厅 +历 +厉 +压 +厌 +厕 +厘 +厚 +原 +厢 +厥 +厦 +厨 +厩 +厮 +去 +县 +参 +又 +叉 +及 +友 +双 +反 +发 +叔 +取 +受 +变 +叙 +叛 +叠 +口 +古 +句 +另 +叨 +叩 +只 +叫 +召 +叭 +叮 +可 +台 +叱 +史 +右 +叵 +叶 +号 +司 +叹 +叼 +吁 +吃 +各 +吆 +合 +吉 +吊 +同 +名 +后 +吏 +吐 +向 +吓 +吕 +吗 +君 +吝 +吞 +吟 +否 +吧 +吨 +吩 +含 +听 +吭 +启 +吴 +吵 +吸 +吹 +吻 +吼 +吾 +吿 +呀 +呃 +呆 +呈 +告 +呐 +呕 +呗 +员 +呛 +呜 +呢 +呦 +周 +呲 +味 +呵 +呼 +命 +咀 +咄 +咋 +和 +咎 +咏 +咐 +咒 +咔 +咕 +咖 +咙 +咚 +咣 +咤 +咧 +咨 +咪 +咫 +咬 +咯 +咱 +咳 +咸 +咽 +哀 +品 +哄 +哆 +哇 +哈 +哉 +响 +哎 +哑 +哒 +哗 +哟 +哥 +哦 +哨 +哪 +哭 +哲 +哺 +哼 +哽 +唁 +唇 +唉 +唏 +唐 +唠 +唤 +唬 +售 +唯 +唱 +唾 +啃 +商 +啊 +啕 +啡 +啤 +啥 +啦 +啧 +啪 +啬 +啰 +啵 +啶 +啸 +啼 +喀 +喂 +善 +喆 +喇 +喉 +喊 +喔 +喘 +喜 +喝 +喧 +喱 +喵 +喷 +喻 +喽 +嗅 +嗑 +嗒 +嗓 +嗡 +嗣 +嗤 +嗦 +嗨 +嗬 +嗯 +嗲 +嗷 +嗽 +嘀 +嘈 +嘉 +嘎 +嘘 +嘛 +嘟 +嘭 +嘱 +嘲 +嘴 +嘶 +嘻 +噎 +噘 +器 +噩 +噪 +噬 +噱 +噼 +嚎 +嚏 +嚓 +嚣 +嚷 +嚼 +囊 +囚 +四 +回 +因 +团 +囤 +囧 +园 +困 +围 +固 +国 +图 +圃 +圆 +圈 +土 +圣 +在 +圩 +圪 +圭 +地 +圳 +场 +圾 +址 +坂 +均 +坊 +坍 +坎 +坏 +坐 +坑 +块 +坚 +坛 +坝 +坞 +坟 +坠 +坡 +坤 +坦 +坪 +坯 +坷 +垂 +垃 +垄 +垅 +型 +垌 +垒 +垛 +垡 +垢 +垣 +垤 +垦 +垫 +垮 +埃 +埋 +城 +埔 +埜 +域 +埠 +培 +基 +堂 +堆 +堕 +堡 +堤 +堪 +堰 +堵 +塌 +塍 +塑 +塔 +塘 +塞 +填 +塬 +塾 +境 +墅 +墓 +墙 +增 +墟 +墨 +墩 +壁 +壑 +壕 +壤 +士 +壮 +声 +壳 +壶 +壹 +处 +备 +复 +夏 +夕 +外 +夙 +多 +夜 +够 +大 +天 +太 +夫 +夭 +央 +夯 +失 +头 +夷 +夸 +夹 +夺 +奂 +奇 +奈 +奉 +奋 +奎 +奏 +契 +奔 +奕 +奖 +套 +奘 +奚 +奠 +奢 +奥 +女 +奴 +奶 +奸 +她 +好 +如 +妃 +妄 +妆 +妇 +妈 +妊 +妍 +妒 +妖 +妙 +妞 +妤 +妥 +妧 +妨 +妩 +妮 +妯 +妹 +妻 +姆 +姊 +始 +姐 +姑 +姓 +委 +姗 +姚 +姜 +姝 +姣 +姥 +姨 +姬 +姻 +姿 +威 +娃 +娄 +娅 +娇 +娌 +娘 +娜 +娟 +娠 +娥 +娩 +娱 +娴 +娶 +娼 +婀 +婆 +婉 +婕 +婚 +婧 +婪 +婴 +婵 +婶 +婷 +婿 +媒 +媚 +媛 +媞 +媲 +媳 +嫁 +嫂 +嫉 +嫌 +嫔 +嫖 +嫚 +嫡 +嫣 +嫦 +嫩 +嬉 +嬛 +嬷 +孀 +子 +孔 +孕 +字 +存 +孙 +孚 +孜 +孝 +孟 +孢 +季 +孤 +学 +孩 +孪 +孰 +孱 +孵 +孺 +宁 +它 +宅 +宇 +守 +安 +宋 +完 +宏 +宓 +宕 +宗 +官 +宙 +定 +宛 +宜 +宝 +实 +宠 +审 +客 +宣 +室 +宦 +宪 +宫 +宰 +害 +宴 +宵 +家 +宸 +容 +宽 +宾 +宿 +寂 +寄 +寅 +密 +寇 +富 +寐 +寒 +寓 +寝 +寞 +察 +寡 +寥 +寨 +寮 +寰 +寸 +对 +寺 +寻 +导 +寿 +封 +射 +将 +尊 +小 +少 +尔 +尖 +尘 +尚 +尝 +尤 +尧 +尬 +就 +尴 +尸 +尹 +尺 +尼 +尽 +尾 +尿 +局 +屁 +层 +居 +屈 +届 +屋 +屌 +屎 +屏 +屑 +展 +属 +屠 +屡 +履 +屯 +山 +屹 +屿 +岁 +岂 +岌 +岐 +岔 +岖 +岗 +岚 +岛 +岩 +岬 +岭 +岱 +岳 +岷 +岸 +峁 +峙 +峡 +峥 +峨 +峪 +峭 +峰 +峻 +崂 +崃 +崇 +崎 +崔 +崖 +崛 +崧 +崩 +崭 +崴 +嵋 +嵌 +嵘 +嵛 +嵩 +嶝 +巅 +巍 +川 +州 +巡 +巢 +工 +左 +巧 +巨 +巩 +巫 +差 +己 +已 +巴 +巷 +巾 +巿 +币 +市 +布 +帅 +帆 +师 +希 +帐 +帕 +帖 +帘 +帚 +帜 +帝 +带 +席 +帮 +帷 +常 +帼 +帽 +幂 +幄 +幅 +幌 +幕 +幢 +干 +平 +年 +并 +幸 +幺 +幻 +幼 +幽 +广 +庄 +庆 +庇 +床 +序 +庐 +库 +应 +底 +店 +庙 +庚 +府 +庞 +废 +度 +座 +庭 +庵 +庶 +康 +庸 +庾 +廉 +廊 +廓 +廖 +延 +廷 +建 +开 +异 +弃 +弄 +弈 +弊 +式 +弑 +弓 +引 +弗 +弘 +弛 +弟 +张 +弥 +弦 +弧 +弩 +弯 +弱 +弹 +强 +归 +当 +录 +彝 +形 +彤 +彦 +彩 +彪 +彬 +彭 +彰 +影 +彷 +役 +彻 +彼 +彿 +往 +征 +径 +待 +徇 +很 +徉 +徊 +律 +徐 +徒 +得 +徘 +徙 +御 +循 +微 +德 +徽 +心 +必 +忆 +忌 +忍 +忐 +忑 +志 +忘 +忙 +忠 +忧 +忪 +快 +忱 +念 +忻 +忽 +怀 +态 +怂 +怅 +怎 +怒 +怕 +怖 +怜 +思 +怠 +怡 +急 +怦 +性 +怨 +怪 +怯 +怵 +总 +恋 +恍 +恐 +恒 +恙 +恢 +恣 +恤 +恨 +恩 +恪 +恬 +恭 +息 +恰 +恳 +恶 +恸 +恺 +恼 +恿 +悄 +悉 +悌 +悍 +悔 +悖 +悚 +悟 +悠 +患 +悦 +您 +悬 +悯 +悲 +悴 +悸 +悼 +情 +惆 +惊 +惋 +惑 +惕 +惚 +惜 +惟 +惠 +惦 +惧 +惨 +惩 +惫 +惬 +惮 +惯 +惰 +想 +惶 +惹 +惺 +愁 +愈 +愉 +意 +愕 +愚 +感 +愤 +愧 +愿 +慈 +慌 +慎 +慑 +慕 +慢 +慧 +慨 +慰 +慷 +憋 +憔 +憧 +憨 +憩 +憬 +憷 +憾 +懂 +懈 +懊 +懋 +懒 +懵 +懿 +戈 +戎 +戏 +成 +我 +戒 +或 +战 +戚 +戛 +戟 +截 +戬 +戮 +戳 +戴 +户 +房 +所 +扁 +扇 +扉 +手 +才 +扎 +扑 +扒 +打 +扔 +托 +扛 +扣 +执 +扩 +扫 +扬 +扭 +扮 +扯 +扰 +扳 +扶 +批 +扼 +找 +承 +技 +抄 +抉 +把 +抑 +抒 +抓 +投 +抖 +抗 +折 +抚 +抛 +抠 +抡 +抢 +护 +报 +抨 +披 +抬 +抱 +抵 +抹 +押 +抽 +抿 +拄 +担 +拆 +拇 +拈 +拉 +拌 +拍 +拎 +拐 +拒 +拓 +拔 +拖 +拗 +拘 +拙 +招 +拜 +拟 +拢 +拣 +拥 +拦 +拧 +拨 +择 +括 +拭 +拮 +拯 +拱 +拳 +拴 +拷 +拼 +拽 +拾 +拿 +持 +挂 +指 +按 +挎 +挑 +挖 +挚 +挛 +挝 +挟 +挠 +挡 +挣 +挤 +挥 +挨 +挪 +挫 +振 +挺 +挽 +捂 +捅 +捆 +捉 +捍 +捎 +捏 +捐 +捕 +捞 +损 +捡 +换 +捣 +捧 +据 +捷 +捺 +捻 +掀 +掂 +授 +掉 +掌 +掏 +掐 +排 +掖 +掘 +掠 +探 +掣 +接 +控 +推 +掩 +措 +掬 +掮 +掰 +掳 +掴 +掷 +掺 +揄 +揉 +揍 +描 +提 +插 +握 +揣 +揩 +揪 +揭 +援 +揶 +揽 +搀 +搁 +搂 +搅 +搏 +搜 +搞 +搡 +搪 +搬 +搭 +携 +搽 +摁 +摄 +摆 +摇 +摊 +摒 +摔 +摘 +摧 +摩 +摸 +摹 +撂 +撇 +撑 +撒 +撕 +撞 +撤 +撩 +撬 +播 +撮 +撰 +撵 +撸 +撼 +擂 +擅 +操 +擎 +擒 +擘 +擞 +擦 +攀 +攒 +攥 +支 +收 +改 +攻 +放 +政 +故 +效 +敌 +敏 +救 +敖 +教 +敛 +敝 +敞 +敢 +散 +敦 +敬 +数 +敲 +整 +敷 +文 +斋 +斌 +斐 +斑 +斓 +斗 +料 +斛 +斜 +斟 +斡 +斤 +斥 +斧 +斩 +断 +斯 +新 +方 +施 +旁 +旅 +旋 +族 +旗 +无 +既 +日 +旦 +旧 +旨 +早 +旬 +旭 +旱 +时 +旷 +旺 +昀 +昂 +昆 +昊 +昌 +明 +昏 +易 +昔 +昕 +昙 +星 +映 +春 +昧 +昨 +昭 +是 +昱 +昵 +昼 +显 +晃 +晋 +晏 +晒 +晓 +晔 +晕 +晖 +晗 +晚 +晟 +晤 +晦 +晨 +普 +景 +晰 +晴 +晶 +智 +晾 +暂 +暄 +暇 +暑 +暖 +暗 +暧 +暨 +暮 +暴 +曙 +曝 +曦 +曰 +曲 +更 +曹 +曼 +曾 +替 +最 +月 +有 +朋 +服 +朐 +朔 +朗 +望 +朝 +期 +朦 +木 +未 +末 +本 +札 +术 +朱 +朴 +朵 +机 +朽 +杀 +杂 +权 +杆 +杉 +李 +杏 +材 +村 +杖 +杜 +杞 +束 +杠 +条 +来 +杨 +杭 +杯 +杰 +杳 +松 +板 +极 +构 +枉 +析 +枕 +林 +枚 +果 +枝 +枞 +枢 +枣 +枪 +枫 +枭 +枯 +架 +枷 +柄 +柏 +某 +染 +柔 +柚 +柜 +柞 +柠 +查 +柬 +柯 +柱 +柳 +柴 +柿 +栅 +标 +栈 +栋 +栏 +树 +栓 +栖 +栗 +校 +株 +样 +核 +根 +格 +栽 +栾 +桂 +桃 +框 +案 +桉 +桌 +桎 +桐 +桑 +桓 +桔 +档 +桥 +桦 +桩 +桶 +梁 +梅 +梓 +梗 +梦 +梧 +梨 +梭 +梯 +械 +梳 +梵 +检 +棉 +棋 +棍 +棒 +棕 +棘 +棚 +棠 +森 +棱 +棵 +棺 +椅 +椋 +植 +椎 +椒 +椰 +椿 +楂 +楔 +楚 +楞 +楠 +楣 +楷 +楼 +概 +榄 +榆 +榈 +榉 +榔 +榕 +榜 +榨 +榭 +榴 +榷 +榻 +槌 +槎 +槐 +槛 +槟 +槽 +槿 +樊 +樟 +模 +横 +樱 +橄 +橘 +橙 +橡 +橱 +檀 +檐 +檬 +欠 +次 +欢 +欣 +欧 +欲 +欺 +款 +歆 +歇 +歉 +歌 +止 +正 +此 +步 +武 +歧 +歪 +歹 +死 +殃 +殆 +殉 +殊 +残 +殒 +殓 +殖 +殚 +殡 +殭 +殴 +段 +殷 +殿 +毁 +毂 +毅 +毋 +母 +每 +毒 +毓 +比 +毕 +毗 +毙 +毛 +毫 +毯 +毽 +氏 +民 +氓 +气 +氛 +氟 +氢 +氦 +氧 +氨 +氪 +氮 +氯 +氰 +水 +永 +氾 +汀 +汁 +求 +汇 +汉 +汕 +汗 +汛 +汝 +汞 +江 +池 +污 +汤 +汪 +汰 +汲 +汴 +汶 +汹 +汽 +汾 +沁 +沂 +沃 +沅 +沈 +沉 +沏 +沐 +沓 +沙 +沛 +沟 +没 +沣 +沥 +沦 +沧 +沪 +沫 +沮 +沱 +河 +沸 +油 +治 +沼 +沽 +沾 +沿 +泄 +泉 +泊 +泌 +泓 +泔 +法 +泗 +泛 +泞 +泠 +泡 +波 +泣 +泥 +注 +泪 +泯 +泰 +泱 +泳 +泵 +泷 +泸 +泻 +泼 +泽 +泾 +洁 +洋 +洒 +洗 +洙 +洛 +洞 +津 +洪 +洱 +洲 +洵 +活 +洼 +洽 +派 +流 +浅 +浆 +浇 +浈 +浊 +测 +济 +浏 +浑 +浓 +浙 +浚 +浦 +浩 +浪 +浮 +浴 +海 +浸 +涂 +涅 +消 +涉 +涌 +涎 +涓 +涕 +涛 +涝 +涞 +涟 +涠 +涡 +涤 +润 +涧 +涨 +涩 +涮 +涯 +液 +涵 +涿 +淀 +淄 +淆 +淇 +淋 +淌 +淑 +淖 +淘 +淝 +淞 +淡 +淤 +淫 +淮 +深 +淳 +混 +淹 +添 +淼 +渀 +清 +渊 +渍 +渎 +渐 +渔 +渗 +渚 +渝 +渠 +渡 +渣 +渤 +渥 +温 +渭 +港 +渲 +渴 +游 +渺 +湃 +湄 +湍 +湖 +湘 +湛 +湾 +湿 +溃 +溅 +溉 +源 +溜 +溢 +溥 +溧 +溪 +溯 +溶 +溺 +滁 +滇 +滋 +滑 +滔 +滕 +滚 +滞 +满 +滢 +滤 +滥 +滨 +滩 +滴 +漂 +漆 +漏 +漓 +演 +漕 +漠 +漩 +漫 +漭 +漯 +漱 +漳 +漾 +潇 +潘 +潜 +潞 +潢 +潦 +潭 +潮 +潼 +澄 +澈 +澎 +澜 +澡 +澳 +激 +濑 +濒 +濠 +濡 +濮 +瀑 +瀚 +瀛 +灌 +灞 +火 +灭 +灯 +灰 +灵 +灶 +灸 +灼 +灾 +灿 +炅 +炉 +炊 +炎 +炒 +炕 +炖 +炙 +炜 +炫 +炬 +炭 +炮 +炯 +炳 +炷 +炸 +点 +炼 +炽 +烁 +烂 +烃 +烈 +烊 +烘 +烙 +烛 +烟 +烤 +烦 +烧 +烨 +烫 +热 +烯 +烷 +烹 +烽 +焉 +焊 +焕 +焖 +焘 +焚 +焦 +焯 +焰 +焱 +然 +煊 +煌 +煎 +煜 +煞 +煤 +煦 +照 +煮 +煲 +熄 +熊 +熏 +熔 +熙 +熟 +熠 +熨 +熬 +熹 +燃 +燊 +燎 +燕 +燥 +爆 +爪 +爬 +爱 +爵 +父 +爷 +爸 +爹 +爽 +片 +版 +牌 +牙 +牛 +牟 +牡 +牢 +牧 +物 +牲 +牵 +特 +牺 +牾 +犀 +犁 +犄 +犊 +犒 +犬 +犯 +状 +犷 +犹 +狂 +狄 +狈 +狐 +狒 +狗 +狙 +狞 +狠 +狡 +狩 +独 +狭 +狮 +狰 +狱 +狸 +狼 +猎 +猖 +猛 +猜 +猝 +猥 +猩 +猪 +猫 +猬 +献 +猴 +猾 +猿 +獒 +獗 +獾 +玄 +率 +玉 +王 +玖 +玛 +玟 +玥 +玩 +玫 +玮 +环 +现 +玲 +玳 +玺 +玻 +珀 +珉 +珊 +珍 +珏 +珑 +珜 +珠 +班 +珮 +珲 +珺 +球 +琅 +理 +琉 +琊 +琏 +琐 +琛 +琢 +琥 +琦 +琨 +琪 +琬 +琰 +琳 +琴 +琵 +琶 +琼 +瑁 +瑄 +瑕 +瑙 +瑚 +瑛 +瑜 +瑞 +瑟 +瑰 +瑶 +瑾 +璀 +璃 +璇 +璋 +璐 +璞 +璧 +璨 +瓜 +瓢 +瓣 +瓦 +瓮 +瓯 +瓶 +瓷 +甄 +甘 +甚 +甜 +生 +甥 +用 +甩 +甫 +甬 +甯 +田 +由 +甲 +申 +电 +男 +甸 +町 +画 +畅 +畊 +界 +畏 +畔 +留 +畜 +略 +番 +畴 +畸 +畿 +疃 +疆 +疏 +疑 +疗 +疚 +疝 +疤 +疫 +疯 +疲 +疵 +疹 +疼 +疾 +病 +症 +痉 +痊 +痒 +痕 +痘 +痛 +痣 +痪 +痫 +痰 +痱 +痴 +痹 +痼 +瘀 +瘁 +瘟 +瘠 +瘤 +瘦 +瘩 +瘪 +瘫 +瘸 +瘾 +癌 +癖 +癣 +癫 +登 +白 +百 +皂 +的 +皆 +皇 +皋 +皎 +皓 +皖 +皙 +皮 +皱 +盆 +盈 +益 +盎 +盐 +监 +盒 +盔 +盖 +盗 +盘 +盛 +盟 +目 +盯 +盲 +直 +相 +盹 +盼 +盾 +省 +眈 +眉 +看 +真 +眠 +眨 +眬 +眯 +眶 +眷 +眺 +眼 +着 +睁 +睐 +睛 +睡 +督 +睦 +睫 +睬 +睹 +睾 +睿 +瞄 +瞅 +瞌 +瞎 +瞒 +瞟 +瞧 +瞩 +瞪 +瞬 +瞰 +瞳 +瞻 +瞿 +矗 +矛 +矜 +矢 +矣 +知 +矩 +矫 +短 +矮 +石 +矶 +矸 +矿 +码 +砂 +砌 +砍 +砒 +研 +砖 +砚 +砝 +砥 +砰 +砲 +破 +砷 +砸 +砺 +砾 +础 +硅 +硕 +硚 +硝 +硫 +硬 +确 +碉 +碌 +碍 +碎 +碑 +碗 +碘 +碚 +碟 +碧 +碰 +碱 +碳 +碴 +碾 +磁 +磅 +磊 +磋 +磐 +磕 +磡 +磨 +磴 +磷 +磺 +礁 +示 +礼 +社 +祁 +祈 +祉 +祖 +祛 +祝 +神 +祠 +祢 +祥 +票 +祭 +祯 +祷 +祸 +祺 +禀 +禁 +禄 +禅 +福 +禧 +禹 +禺 +离 +禽 +禾 +秀 +私 +秃 +秆 +秉 +秋 +种 +科 +秒 +秘 +租 +秣 +秤 +秦 +秧 +秩 +积 +称 +秸 +移 +秽 +稀 +程 +稍 +税 +稚 +稠 +稣 +稳 +稻 +稼 +稽 +稿 +穆 +穗 +穴 +究 +穷 +空 +穿 +突 +窃 +窄 +窈 +窍 +窑 +窒 +窕 +窖 +窗 +窘 +窜 +窝 +窟 +窥 +窦 +窨 +窿 +立 +竖 +站 +竞 +竟 +章 +竣 +童 +竭 +端 +竲 +竹 +竺 +竽 +竿 +笃 +笈 +笋 +笑 +笔 +笙 +笛 +符 +笨 +第 +笼 +等 +筋 +筏 +筐 +筑 +筒 +答 +策 +筛 +筱 +筵 +筷 +筹 +签 +简 +箍 +箔 +箕 +算 +管 +箫 +箭 +箱 +篇 +篡 +篪 +篮 +篷 +簇 +簧 +簸 +簿 +籁 +籍 +米 +类 +籽 +粉 +粒 +粕 +粗 +粘 +粟 +粤 +粥 +粪 +粮 +粱 +粹 +粽 +精 +糊 +糕 +糖 +糗 +糙 +糟 +糯 +系 +紊 +素 +索 +紧 +紫 +累 +絮 +綦 +繁 +纠 +红 +纣 +纤 +约 +级 +纪 +纬 +纯 +纰 +纱 +纲 +纳 +纵 +纶 +纷 +纸 +纹 +纺 +纽 +线 +练 +组 +绅 +细 +织 +终 +绊 +绌 +绍 +绎 +经 +绑 +绒 +结 +绕 +绘 +给 +绚 +络 +绝 +绞 +统 +绢 +绣 +继 +绩 +绪 +续 +绮 +绯 +绰 +绳 +维 +绵 +绷 +绸 +综 +绽 +绿 +缀 +缄 +缅 +缆 +缇 +缉 +缓 +缔 +缕 +编 +缘 +缙 +缚 +缜 +缝 +缠 +缤 +缨 +缩 +缪 +缭 +缮 +缰 +缴 +缸 +缺 +罂 +罄 +罐 +网 +罕 +罗 +罚 +罡 +罢 +罩 +罪 +置 +署 +罹 +羁 +羊 +美 +羔 +羚 +羞 +羡 +羣 +群 +羲 +羹 +羽 +羿 +翁 +翅 +翌 +翔 +翘 +翟 +翠 +翡 +翩 +翰 +翱 +翻 +翼 +耀 +老 +考 +耄 +者 +耋 +而 +耍 +耐 +耒 +耕 +耗 +耘 +耳 +耶 +耷 +耸 +耻 +耽 +耿 +聂 +聆 +聊 +聋 +职 +联 +聘 +聚 +聪 +肃 +肆 +肇 +肉 +肋 +肌 +肖 +肘 +肚 +肛 +肝 +肠 +股 +肢 +肤 +肥 +肩 +肪 +肮 +肯 +育 +肴 +肺 +肾 +肿 +胀 +胁 +胃 +胆 +背 +胎 +胖 +胚 +胛 +胜 +胞 +胡 +胤 +胧 +胫 +胯 +胰 +胱 +胳 +胶 +胸 +胺 +能 +脂 +脆 +脉 +脊 +脍 +脏 +脐 +脑 +脖 +脚 +脯 +脱 +脸 +脾 +腆 +腊 +腋 +腌 +腐 +腑 +腓 +腔 +腕 +腥 +腩 +腮 +腰 +腱 +腹 +腺 +腻 +腼 +腾 +腿 +膀 +膊 +膏 +膑 +膛 +膜 +膝 +膨 +膳 +膺 +臀 +臂 +臃 +臆 +臣 +自 +臭 +至 +致 +臻 +舀 +舅 +舆 +舌 +舍 +舒 +舛 +舜 +舞 +舟 +航 +般 +舰 +舱 +舵 +舶 +舸 +船 +艇 +艋 +艘 +良 +艰 +色 +艳 +艺 +艾 +节 +芊 +芋 +芒 +芙 +芜 +芝 +芦 +芪 +芬 +芭 +芮 +芯 +花 +芳 +芷 +芸 +芽 +苇 +苍 +苏 +苑 +苗 +苛 +苟 +苡 +苣 +若 +苦 +苯 +英 +苹 +茁 +茂 +范 +茄 +茅 +茆 +茎 +茗 +茜 +茨 +茫 +茬 +茵 +茶 +茸 +茹 +荃 +荆 +荇 +草 +荐 +荒 +荔 +荚 +荞 +荟 +荡 +荣 +荤 +荧 +荫 +药 +荷 +荼 +莅 +莆 +莉 +莎 +莓 +莘 +莞 +莠 +莫 +莱 +莲 +莴 +获 +莹 +莺 +莽 +菁 +菇 +菊 +菌 +菜 +菠 +菡 +菩 +菱 +菲 +萃 +萄 +萋 +萌 +萍 +萎 +萝 +萤 +营 +萦 +萧 +萨 +萱 +落 +葆 +著 +葛 +葡 +董 +葩 +葫 +葬 +葱 +葵 +蒂 +蒋 +蒙 +蒜 +蒲 +蒸 +蒿 +蓁 +蓄 +蓉 +蓝 +蓟 +蓬 +蔑 +蔓 +蔗 +蔚 +蔡 +蔫 +蔬 +蔷 +蔺 +蔽 +蕉 +蕊 +蕙 +蕲 +蕴 +蕾 +薄 +薇 +薙 +薛 +薪 +薯 +薰 +藏 +藜 +藤 +藩 +藻 +蘑 +虎 +虏 +虐 +虑 +虚 +虞 +虫 +虱 +虹 +虽 +虾 +蚀 +蚁 +蚂 +蚊 +蚌 +蚓 +蚕 +蚝 +蚣 +蚯 +蛀 +蛆 +蛇 +蛋 +蛐 +蛙 +蛛 +蛟 +蛮 +蛰 +蜀 +蜂 +蜇 +蜈 +蜊 +蜒 +蜓 +蜕 +蜗 +蜘 +蜚 +蜜 +蜡 +蜥 +蜴 +蜷 +蜻 +蜿 +蝇 +蝉 +蝎 +蝗 +蝙 +蝠 +蝴 +蝶 +螂 +螃 +融 +螳 +螺 +蟀 +蟋 +蟑 +蟒 +蟹 +蠕 +蠢 +血 +衅 +行 +衍 +衔 +街 +衙 +衡 +衣 +补 +表 +衫 +衬 +衰 +衷 +袁 +袂 +袄 +袆 +袈 +袋 +袍 +袒 +袖 +袜 +被 +袭 +袱 +裁 +裂 +装 +裆 +裔 +裕 +裙 +裟 +裤 +裳 +裴 +裸 +裹 +褂 +褒 +褓 +褚 +褛 +褪 +褴 +褶 +襁 +襄 +襟 +西 +要 +覃 +覆 +见 +观 +规 +觅 +视 +览 +觉 +觊 +觎 +觐 +觑 +角 +解 +觥 +触 +言 +詹 +誉 +誓 +警 +譬 +计 +订 +认 +讧 +讨 +让 +讪 +训 +议 +讯 +记 +讲 +讳 +讶 +许 +讹 +论 +讼 +讽 +设 +访 +诀 +证 +评 +诅 +识 +诈 +诉 +诊 +词 +译 +诓 +试 +诗 +诙 +诚 +话 +诞 +诟 +诠 +诡 +询 +该 +详 +诧 +诩 +诫 +诬 +语 +误 +诱 +诲 +说 +诵 +诶 +请 +诸 +诺 +读 +诽 +课 +诿 +谀 +谁 +调 +谅 +谈 +谊 +谋 +谌 +谍 +谎 +谐 +谑 +谓 +谕 +谙 +谚 +谜 +谢 +谣 +谤 +谦 +谨 +谩 +谬 +谭 +谱 +谴 +谷 +豁 +豆 +豚 +象 +豪 +豫 +豹 +貅 +貉 +貌 +貔 +贝 +贞 +负 +贡 +财 +责 +贤 +败 +账 +货 +质 +贩 +贪 +贫 +贬 +购 +贮 +贯 +贱 +贴 +贵 +贷 +贸 +费 +贺 +贼 +贾 +贿 +赁 +赂 +赃 +资 +赋 +赌 +赎 +赏 +赐 +赔 +赖 +赘 +赚 +赛 +赝 +赞 +赠 +赡 +赢 +赣 +赤 +赦 +赫 +走 +赴 +赵 +赶 +起 +趁 +超 +越 +趋 +趟 +趣 +足 +趴 +趸 +趾 +跃 +跄 +跆 +跌 +跑 +跛 +距 +跟 +跤 +跨 +跪 +路 +跳 +践 +跷 +跺 +跻 +踉 +踊 +踏 +踝 +踞 +踢 +踩 +踪 +踵 +踹 +蹂 +蹄 +蹈 +蹊 +蹚 +蹦 +蹬 +蹭 +蹲 +蹴 +蹶 +蹼 +蹿 +躁 +躏 +身 +躬 +躯 +躲 +躺 +车 +轧 +轨 +轩 +转 +轮 +软 +轰 +轴 +轶 +轻 +载 +轿 +较 +辄 +辅 +辆 +辈 +辉 +辍 +辐 +辑 +输 +辖 +辗 +辘 +辙 +辛 +辜 +辞 +辟 +辣 +辨 +辩 +辫 +辰 +辱 +边 +辽 +达 +迁 +迂 +迄 +迅 +过 +迈 +迎 +运 +近 +返 +还 +这 +进 +远 +违 +连 +迟 +迢 +迥 +迪 +迫 +迭 +述 +迷 +迸 +迹 +追 +退 +送 +适 +逃 +逅 +逆 +选 +逊 +逍 +透 +逐 +递 +途 +逗 +通 +逛 +逝 +逞 +速 +造 +逡 +逢 +逮 +逵 +逸 +逻 +逼 +逾 +遁 +遂 +遇 +遍 +遏 +遐 +道 +遗 +遛 +遢 +遣 +遥 +遨 +遭 +遮 +遴 +遵 +避 +邀 +邂 +邃 +邋 +邑 +邓 +邛 +邝 +邢 +那 +邦 +邪 +邬 +邮 +邯 +邱 +邵 +邹 +邺 +邻 +郁 +郊 +郎 +郑 +郜 +郝 +郡 +部 +郫 +郭 +郸 +都 +鄂 +鄙 +鄞 +鄢 +酋 +酌 +配 +酒 +酗 +酝 +酣 +酪 +酬 +酯 +酱 +酵 +酶 +酷 +酸 +酿 +醇 +醉 +醋 +醍 +醐 +醒 +醛 +采 +釉 +释 +里 +重 +野 +量 +金 +釜 +鉴 +鏖 +鑫 +针 +钉 +钊 +钒 +钓 +钛 +钜 +钝 +钞 +钟 +钠 +钢 +钥 +钦 +钧 +钩 +钮 +钰 +钱 +钴 +钵 +钻 +钾 +铀 +铁 +铂 +铃 +铅 +铆 +铉 +铎 +铐 +铜 +铝 +铠 +铡 +铣 +铨 +铬 +铭 +铮 +铰 +铲 +银 +铸 +铺 +链 +铿 +销 +锁 +锂 +锄 +锅 +锆 +锈 +锋 +锌 +锏 +锐 +错 +锚 +锜 +锟 +锡 +锢 +锣 +锤 +锥 +锦 +锭 +键 +锯 +锰 +锵 +锷 +锹 +锻 +镀 +镁 +镇 +镉 +镊 +镍 +镐 +镑 +镖 +镜 +镯 +镳 +镶 +长 +门 +闪 +闫 +闭 +问 +闯 +闰 +闲 +闳 +间 +闵 +闷 +闸 +闹 +闺 +闻 +闽 +阀 +阁 +阂 +阅 +阎 +阐 +阔 +阙 +阚 +阜 +队 +阮 +阱 +防 +阳 +阴 +阵 +阶 +阻 +阿 +陀 +陂 +附 +际 +陆 +陇 +陈 +陋 +陌 +降 +限 +陕 +陡 +院 +除 +陨 +险 +陪 +陬 +陵 +陶 +陷 +隅 +隆 +隋 +隍 +随 +隐 +隔 +隘 +隙 +障 +隧 +隶 +隼 +隽 +难 +雀 +雁 +雄 +雅 +集 +雇 +雌 +雍 +雏 +雕 +雨 +雪 +雯 +雳 +零 +雷 +雾 +需 +霁 +霄 +霆 +震 +霈 +霉 +霍 +霎 +霏 +霖 +霜 +霞 +露 +霸 +霹 +霾 +靑 +青 +靓 +靖 +静 +靛 +非 +靠 +靡 +面 +革 +靳 +靴 +靶 +鞋 +鞍 +鞘 +鞠 +鞭 +韦 +韧 +韩 +韬 +音 +韵 +韶 +页 +顶 +顷 +项 +顺 +须 +顽 +顾 +顿 +颁 +颂 +预 +颅 +领 +颇 +颈 +颊 +颍 +颐 +频 +颓 +颖 +颗 +题 +颚 +颜 +额 +颠 +颤 +风 +飒 +飓 +飘 +飙 +飚 +飞 +食 +餐 +餮 +饕 +饥 +饪 +饭 +饮 +饰 +饱 +饲 +饵 +饶 +饺 +饼 +饽 +饿 +馀 +馅 +馆 +馈 +馊 +馋 +馑 +馒 +首 +馗 +香 +馥 +馨 +马 +驭 +驯 +驰 +驱 +驳 +驴 +驶 +驻 +驼 +驾 +驿 +骁 +骂 +骄 +骅 +骆 +骇 +骊 +骋 +验 +骏 +骐 +骑 +骗 +骚 +骜 +骤 +骥 +骨 +骷 +骸 +骼 +髅 +髋 +髌 +髓 +高 +髦 +鬼 +魁 +魂 +魄 +魅 +魇 +魏 +魔 +鱼 +鲁 +鲍 +鲜 +鲟 +鲤 +鲨 +鲶 +鲷 +鲸 +鳄 +鳅 +鳌 +鳖 +鳝 +鳞 +鸟 +鸠 +鸡 +鸣 +鸥 +鸦 +鸭 +鸯 +鸳 +鸵 +鸽 +鸾 +鸿 +鹃 +鹅 +鹊 +鹏 +鹜 +鹞 +鹤 +鹭 +鹰 +鹿 +麋 +麒 +麓 +麟 +麦 +麻 +麾 +黄 +黍 +黎 +黏 +黑 +黔 +默 +黛 +黝 +黯 +鼎 +鼓 +鼠 +鼻 +鼾 +齐 +齿 +龄 +龙 +龚 +龟 +a +c +k +t + diff --git a/modules/audio/asr/deepspeech2_aishell/deepspeech_tester.py b/modules/audio/asr/deepspeech2_aishell/deepspeech_tester.py new file mode 100644 index 0000000000000000000000000000000000000000..6b1f89759aa4e156e4e8c9f99d22a5c5b738139f --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/deepspeech_tester.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Evaluation for DeepSpeech2 model.""" +import os +import sys +from pathlib import Path + +import paddle + +from deepspeech.frontend.featurizer.text_featurizer import TextFeaturizer +from deepspeech.io.collator import SpeechCollator +from deepspeech.models.ds2 import DeepSpeech2Model +from deepspeech.utils import mp_tools +from deepspeech.utils.utility import UpdateConfig + + +class DeepSpeech2Tester: + def __init__(self, config): + self.config = config + self.collate_fn_test = SpeechCollator.from_config(config) + self._text_featurizer = TextFeaturizer(unit_type=config.collator.unit_type, vocab_filepath=None) + + def compute_result_transcripts(self, audio, audio_len, vocab_list, cfg): + result_transcripts = self.model.decode( + audio, + audio_len, + vocab_list, + decoding_method=cfg.decoding_method, + lang_model_path=cfg.lang_model_path, + beam_alpha=cfg.alpha, + beam_beta=cfg.beta, + beam_size=cfg.beam_size, + cutoff_prob=cfg.cutoff_prob, + cutoff_top_n=cfg.cutoff_top_n, + num_processes=cfg.num_proc_bsearch) + #replace the '' with ' ' + result_transcripts = [self._text_featurizer.detokenize(sentence) for sentence in result_transcripts] + + return result_transcripts + + @mp_tools.rank_zero_only + @paddle.no_grad() + def test(self, audio_file): + self.model.eval() + cfg = self.config + collate_fn_test = self.collate_fn_test + audio, _ = collate_fn_test.process_utterance(audio_file=audio_file, transcript=" ") + audio_len = audio.shape[0] + audio = paddle.to_tensor(audio, dtype='float32') + audio_len = paddle.to_tensor(audio_len) + audio = paddle.unsqueeze(audio, axis=0) + vocab_list = collate_fn_test.vocab_list + result_transcripts = self.compute_result_transcripts(audio, audio_len, vocab_list, cfg.decoding) + return result_transcripts + + def setup_model(self): + config = self.config.clone() + with UpdateConfig(config): + config.model.feat_size = self.collate_fn_test.feature_size + config.model.dict_size = self.collate_fn_test.vocab_size + + model = DeepSpeech2Model.from_config(config.model) + self.model = model + + def resume(self, checkpoint): + """Resume from the checkpoint at checkpoints in the output + directory or load a specified checkpoint. + """ + model_dict = paddle.load(checkpoint) + self.model.set_state_dict(model_dict) diff --git a/modules/audio/asr/deepspeech2_aishell/module.py b/modules/audio/asr/deepspeech2_aishell/module.py new file mode 100644 index 0000000000000000000000000000000000000000..3e18e4b000fe90270158b9d0295f770409359497 --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/module.py @@ -0,0 +1,92 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +import sys + +import numpy as np +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger +from paddle.utils.download import get_path_from_url + +try: + import swig_decoders +except ModuleNotFoundError as e: + logger.error(e) + logger.info('The module requires additional dependencies: swig_decoders. ' + 'please install via:\n\'git clone https://github.com/PaddlePaddle/DeepSpeech.git ' + '&& cd DeepSpeech && git reset --hard b53171694e7b87abe7ea96870b2f4d8e0e2b1485 ' + '&& cd deepspeech/decoders/ctcdecoder/swig && sh setup.sh\'') + sys.exit(1) + +import paddle +import soundfile as sf + +# TODO: Remove system path when deepspeech can be installed via pip. +sys.path.append(os.path.join(MODULE_HOME, 'deepspeech2_aishell')) +from deepspeech.exps.deepspeech2.config import get_cfg_defaults +from deepspeech.utils.utility import UpdateConfig +from .deepspeech_tester import DeepSpeech2Tester + +LM_URL = 'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm' +LM_MD5 = '29e02312deb2e59b3c8686c7966d4fe3' + + +@moduleinfo(name="deepspeech2_aishell", version="1.0.0", summary="", author="Baidu", author_email="", type="audio/asr") +class DeepSpeech2(paddle.nn.Layer): + def __init__(self): + super(DeepSpeech2, self).__init__() + + # resource + res_dir = os.path.join(MODULE_HOME, 'deepspeech2_aishell', 'assets') + conf_file = os.path.join(res_dir, 'conf/deepspeech2.yaml') + checkpoint = os.path.join(res_dir, 'checkpoints/avg_1.pdparams') + # Download LM manually cause its large size. + lm_path = os.path.join(res_dir, 'data', 'lm') + lm_file = os.path.join(lm_path, LM_URL.split('/')[-1]) + if not os.path.isfile(lm_file): + logger.info(f'Downloading lm from {LM_URL}.') + get_path_from_url(url=LM_URL, root_dir=lm_path, md5sum=LM_MD5) + + # config + self.model_type = 'offline' + self.config = get_cfg_defaults(self.model_type) + self.config.merge_from_file(conf_file) + + # TODO: Remove path updating snippet. + with UpdateConfig(self.config): + self.config.collator.mean_std_filepath = os.path.join(res_dir, self.config.collator.mean_std_filepath) + self.config.collator.vocab_filepath = os.path.join(res_dir, self.config.collator.vocab_filepath) + self.config.collator.augmentation_config = os.path.join(res_dir, self.config.collator.augmentation_config) + self.config.decoding.lang_model_path = os.path.join(res_dir, self.config.decoding.lang_model_path) + + # model + self.tester = DeepSpeech2Tester(self.config) + self.tester.setup_model() + self.tester.resume(checkpoint) + + @staticmethod + def check_audio(audio_file): + sig, sample_rate = sf.read(audio_file) + assert sample_rate == 16000, 'Excepting sample rate of input audio is 16000, but got {}'.format(sample_rate) + + @serving + def speech_recognize(self, audio_file, device='cpu'): + assert os.path.isfile(audio_file), 'File not exists: {}'.format(audio_file) + self.check_audio(audio_file) + + paddle.set_device(device) + return self.tester.test(audio_file)[0] diff --git a/modules/audio/asr/deepspeech2_aishell/requirements.txt b/modules/audio/asr/deepspeech2_aishell/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..e6f929d0109a8669c9e67c13eae20e029303f2b6 --- /dev/null +++ b/modules/audio/asr/deepspeech2_aishell/requirements.txt @@ -0,0 +1,12 @@ +# system level: libsnd swig +loguru +yacs +jsonlines +scipy==1.2.1 +sentencepiece +resampy==0.2.2 +SoundFile==0.9.0.post1 +soxbindings +kaldiio +typeguard +editdistance diff --git a/modules/audio/asr/deepspeech2_librispeech/README.md b/modules/audio/asr/deepspeech2_librispeech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a7d4aee0dcf6b0ac4754e6ba9db580c0a79daeed --- /dev/null +++ b/modules/audio/asr/deepspeech2_librispeech/README.md @@ -0,0 +1,153 @@ +# deepspeech2_librispeech + +|模型名称|deepspeech2_librispeech| +| :--- | :---: | +|类别|语音-语音识别| +|网络|DeepSpeech2| +|数据集|LibriSpeech| +|是否支持Fine-tuning|否| +|模型大小|518MB| +|最新更新日期|2021-10-20| +|数据指标|英文WER 0.072| + +## 一、模型基本信息 + +### 模型介绍 + +DeepSpeech2是百度于2015年提出的适用于英文和中文的end-to-end语音识别模型。deepspeech2_librispeech使用了DeepSpeech2离线模型的结构,模型主要由2层卷积网络和3层GRU组成,并在英文开源语音数据集[LibriSpeech ASR corpus](http://www.openslr.org/12/)进行了预训练,该模型在其测试集上的WER指标是0.072。 + + +

+
+

+ +更多详情请参考[Deep Speech 2: End-to-End Speech Recognition in English and Mandarin](https://arxiv.org/abs/1512.02595) + +## 二、安装 + +- ### 1、系统依赖 + + - libsndfile, swig >= 3.0 + - Linux + ```shell + $ sudo apt-get install libsndfile swig + or + $ sudo yum install libsndfile swig + ``` + - MacOs + ``` + $ brew install libsndfile swig + ``` + +- ### 2、环境依赖 + - swig_decoder: + ``` + git clone https://github.com/paddlepaddle/deepspeech && cd DeepSpeech && git reset --hard b53171694e7b87abe7ea96870b2f4d8e0e2b1485 && cd deepspeech/decoders/ctcdecoder/swig && sh setup.sh + ``` + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 3、安装 + + - ```shell + $ hub install deepspeech2_librispeech + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + # 采样率为16k,格式为wav的英文语音音频 + wav_file = '/PATH/TO/AUDIO' + + model = hub.Module( + name='deepspeech2_librispeech', + version='1.0.0') + text = model.speech_recognize(wav_file) + + print(text) + ``` + +- ### 2、API + - ```python + def check_audio(audio_file) + ``` + - 检查输入音频格式和采样率是否满足为16000 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + + - ```python + def speech_recognize( + audio_file, + device='cpu', + ) + ``` + - 将输入的音频识别成文字 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `text`:str类型,返回输入音频的识别文字结果。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的语音识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m deepspeech2_librispeech + ``` + + - 这样就完成了一个语音识别服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 需要识别的音频的存放路径,确保部署服务的机器可访问 + file = '/path/to/input.wav' + + # 以key的方式指定text传入预测方法的时的参数,此例中为"audio_file" + data = {"audio_file": file} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/deepspeech2_librispeech" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install deepspeech2_librispeech + ``` diff --git a/modules/thirdparty/image/classification/DriverStatusRecognition/__init__.py b/modules/audio/asr/deepspeech2_librispeech/__init__.py similarity index 100% rename from modules/thirdparty/image/classification/DriverStatusRecognition/__init__.py rename to modules/audio/asr/deepspeech2_librispeech/__init__.py diff --git a/modules/audio/asr/deepspeech2_librispeech/assets/conf/augmentation.json b/modules/audio/asr/deepspeech2_librispeech/assets/conf/augmentation.json new file mode 100644 index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93 --- /dev/null +++ b/modules/audio/asr/deepspeech2_librispeech/assets/conf/augmentation.json @@ -0,0 +1 @@ +{} diff --git a/modules/audio/asr/deepspeech2_librispeech/assets/conf/deepspeech2.yaml b/modules/audio/asr/deepspeech2_librispeech/assets/conf/deepspeech2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5c2e4668239c63ff457eb5b75dbeb33039da891 --- /dev/null +++ b/modules/audio/asr/deepspeech2_librispeech/assets/conf/deepspeech2.yaml @@ -0,0 +1,68 @@ +# https://yaml.org/type/float.html +data: + train_manifest: data/manifest.train + dev_manifest: data/manifest.dev-clean + test_manifest: data/manifest.test-clean + min_input_len: 0.0 + max_input_len: 30.0 # second + min_output_len: 0.0 + max_output_len: .inf + min_output_input_ratio: 0.00 + max_output_input_ratio: .inf + +collator: + batch_size: 20 + mean_std_filepath: data/mean_std.json + unit_type: char + vocab_filepath: data/vocab.txt + augmentation_config: conf/augmentation.json + random_seed: 0 + spm_model_prefix: + spectrum_type: linear + target_sample_rate: 16000 + max_freq: None + n_fft: None + stride_ms: 10.0 + window_ms: 20.0 + delta_delta: False + dither: 1.0 + use_dB_normalization: True + target_dB: -20 + random_seed: 0 + keep_transcription_text: False + sortagrad: True + shuffle_method: batch_shuffle + num_workers: 2 + +model: + num_conv_layers: 2 + num_rnn_layers: 3 + rnn_layer_size: 2048 + use_gru: False + share_rnn_weights: True + blank_id: 0 + ctc_grad_norm_type: instance + +training: + n_epoch: 50 + accum_grad: 1 + lr: 1e-3 + lr_decay: 0.83 + weight_decay: 1e-06 + global_grad_clip: 5.0 + log_interval: 100 + checkpoint: + kbest_n: 50 + latest_n: 5 + +decoding: + batch_size: 128 + error_rate_type: wer + decoding_method: ctc_beam_search + lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm + alpha: 1.9 + beta: 0.3 + beam_size: 500 + cutoff_prob: 1.0 + cutoff_top_n: 40 + num_proc_bsearch: 8 diff --git a/modules/audio/asr/deepspeech2_librispeech/deepspeech_tester.py b/modules/audio/asr/deepspeech2_librispeech/deepspeech_tester.py new file mode 100644 index 0000000000000000000000000000000000000000..6b1f89759aa4e156e4e8c9f99d22a5c5b738139f --- /dev/null +++ b/modules/audio/asr/deepspeech2_librispeech/deepspeech_tester.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Evaluation for DeepSpeech2 model.""" +import os +import sys +from pathlib import Path + +import paddle + +from deepspeech.frontend.featurizer.text_featurizer import TextFeaturizer +from deepspeech.io.collator import SpeechCollator +from deepspeech.models.ds2 import DeepSpeech2Model +from deepspeech.utils import mp_tools +from deepspeech.utils.utility import UpdateConfig + + +class DeepSpeech2Tester: + def __init__(self, config): + self.config = config + self.collate_fn_test = SpeechCollator.from_config(config) + self._text_featurizer = TextFeaturizer(unit_type=config.collator.unit_type, vocab_filepath=None) + + def compute_result_transcripts(self, audio, audio_len, vocab_list, cfg): + result_transcripts = self.model.decode( + audio, + audio_len, + vocab_list, + decoding_method=cfg.decoding_method, + lang_model_path=cfg.lang_model_path, + beam_alpha=cfg.alpha, + beam_beta=cfg.beta, + beam_size=cfg.beam_size, + cutoff_prob=cfg.cutoff_prob, + cutoff_top_n=cfg.cutoff_top_n, + num_processes=cfg.num_proc_bsearch) + #replace the '' with ' ' + result_transcripts = [self._text_featurizer.detokenize(sentence) for sentence in result_transcripts] + + return result_transcripts + + @mp_tools.rank_zero_only + @paddle.no_grad() + def test(self, audio_file): + self.model.eval() + cfg = self.config + collate_fn_test = self.collate_fn_test + audio, _ = collate_fn_test.process_utterance(audio_file=audio_file, transcript=" ") + audio_len = audio.shape[0] + audio = paddle.to_tensor(audio, dtype='float32') + audio_len = paddle.to_tensor(audio_len) + audio = paddle.unsqueeze(audio, axis=0) + vocab_list = collate_fn_test.vocab_list + result_transcripts = self.compute_result_transcripts(audio, audio_len, vocab_list, cfg.decoding) + return result_transcripts + + def setup_model(self): + config = self.config.clone() + with UpdateConfig(config): + config.model.feat_size = self.collate_fn_test.feature_size + config.model.dict_size = self.collate_fn_test.vocab_size + + model = DeepSpeech2Model.from_config(config.model) + self.model = model + + def resume(self, checkpoint): + """Resume from the checkpoint at checkpoints in the output + directory or load a specified checkpoint. + """ + model_dict = paddle.load(checkpoint) + self.model.set_state_dict(model_dict) diff --git a/modules/audio/asr/deepspeech2_librispeech/module.py b/modules/audio/asr/deepspeech2_librispeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c05d484f95002f1daf532cab9128aa0c592e1dce --- /dev/null +++ b/modules/audio/asr/deepspeech2_librispeech/module.py @@ -0,0 +1,93 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +import sys + +import numpy as np +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger +from paddle.utils.download import get_path_from_url + +try: + import swig_decoders +except ModuleNotFoundError as e: + logger.error(e) + logger.info('The module requires additional dependencies: swig_decoders. ' + 'please install via:\n\'git clone https://github.com/PaddlePaddle/DeepSpeech.git ' + '&& cd DeepSpeech && git reset --hard b53171694e7b87abe7ea96870b2f4d8e0e2b1485 ' + '&& cd deepspeech/decoders/ctcdecoder/swig && sh setup.sh\'') + sys.exit(1) + +import paddle +import soundfile as sf + +# TODO: Remove system path when deepspeech can be installed via pip. +sys.path.append(os.path.join(MODULE_HOME, 'deepspeech2_librispeech')) +from deepspeech.exps.deepspeech2.config import get_cfg_defaults +from deepspeech.utils.utility import UpdateConfig +from .deepspeech_tester import DeepSpeech2Tester + +LM_URL = 'https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm' +LM_MD5 = '099a601759d467cd0a8523ff939819c5' + + +@moduleinfo( + name="deepspeech2_librispeech", version="1.0.0", summary="", author="Baidu", author_email="", type="audio/asr") +class DeepSpeech2(paddle.nn.Layer): + def __init__(self): + super(DeepSpeech2, self).__init__() + + # resource + res_dir = os.path.join(MODULE_HOME, 'deepspeech2_librispeech', 'assets') + conf_file = os.path.join(res_dir, 'conf/deepspeech2.yaml') + checkpoint = os.path.join(res_dir, 'checkpoints/avg_1.pdparams') + # Download LM manually cause its large size. + lm_path = os.path.join(res_dir, 'data', 'lm') + lm_file = os.path.join(lm_path, LM_URL.split('/')[-1]) + if not os.path.isfile(lm_file): + logger.info(f'Downloading lm from {LM_URL}.') + get_path_from_url(url=LM_URL, root_dir=lm_path, md5sum=LM_MD5) + + # config + self.model_type = 'offline' + self.config = get_cfg_defaults(self.model_type) + self.config.merge_from_file(conf_file) + + # TODO: Remove path updating snippet. + with UpdateConfig(self.config): + self.config.collator.mean_std_filepath = os.path.join(res_dir, self.config.collator.mean_std_filepath) + self.config.collator.vocab_filepath = os.path.join(res_dir, self.config.collator.vocab_filepath) + self.config.collator.augmentation_config = os.path.join(res_dir, self.config.collator.augmentation_config) + self.config.decoding.lang_model_path = os.path.join(res_dir, self.config.decoding.lang_model_path) + + # model + self.tester = DeepSpeech2Tester(self.config) + self.tester.setup_model() + self.tester.resume(checkpoint) + + @staticmethod + def check_audio(audio_file): + sig, sample_rate = sf.read(audio_file) + assert sample_rate == 16000, 'Excepting sample rate of input audio is 16000, but got {}'.format(sample_rate) + + @serving + def speech_recognize(self, audio_file, device='cpu'): + assert os.path.isfile(audio_file), 'File not exists: {}'.format(audio_file) + self.check_audio(audio_file) + + paddle.set_device(device) + return self.tester.test(audio_file)[0] diff --git a/modules/audio/asr/deepspeech2_librispeech/requirements.txt b/modules/audio/asr/deepspeech2_librispeech/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..66d8ba6c0edcbc893f3722aadabc5d7e0fa7d669 --- /dev/null +++ b/modules/audio/asr/deepspeech2_librispeech/requirements.txt @@ -0,0 +1,11 @@ +loguru +yacs +jsonlines +scipy==1.2.1 +sentencepiece +resampy==0.2.2 +SoundFile==0.9.0.post1 +soxbindings +kaldiio +typeguard +editdistance diff --git a/modules/audio/asr/u2_conformer_aishell/README.md b/modules/audio/asr/u2_conformer_aishell/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0f3ef2b17089711b8b107fdb905546900a9c8e3f --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/README.md @@ -0,0 +1,156 @@ +# u2_conformer_aishell + +|模型名称|u2_conformer_aishell| +| :--- | :---: | +|类别|语音-语音识别| +|网络|Conformer| +|数据集|AISHELL-1| +|是否支持Fine-tuning|否| +|模型大小|284MB| +|最新更新日期|2021-11-01| +|数据指标|中文CER 0.055| + +## 一、模型基本信息 + +### 模型介绍 + +U2 Conformer模型是一种适用于英文和中文的end-to-end语音识别模型。u2_conformer_aishell采用了conformer的encoder和transformer的decoder的模型结构,并且使用了ctc-prefix beam search的方式进行一遍打分,再利用attention decoder进行二次打分的方式进行解码来得到最终结果。 + +u2_conformer_aishell在中文普通话开源语音数据集[AISHELL-1](http://www.aishelltech.com/kysjcp)进行了预训练,该模型在其测试集上的CER指标是0.055257。 + +

+
+

+ +

+
+

+ +更多详情请参考: +- [Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition](https://arxiv.org/abs/2012.05481) +- [Conformer: Convolution-augmented Transformer for Speech Recognition](https://arxiv.org/abs/2005.08100) + +## 二、安装 + +- ### 1、系统依赖 + + - libsndfile + - Linux + ```shell + $ sudo apt-get install libsndfile + or + $ sudo yum install libsndfile + ``` + - MacOs + ``` + $ brew install libsndfile + ``` + +- ### 2、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 3、安装 + + - ```shell + $ hub install u2_conformer_aishell + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + # 采样率为16k,格式为wav的中文语音音频 + wav_file = '/PATH/TO/AUDIO' + + model = hub.Module( + name='u2_conformer_aishell', + version='1.0.0') + text = model.speech_recognize(wav_file) + + print(text) + ``` + +- ### 2、API + - ```python + def check_audio(audio_file) + ``` + - 检查输入音频格式和采样率是否满足为16000 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + + - ```python + def speech_recognize( + audio_file, + device='cpu', + ) + ``` + - 将输入的音频识别成文字 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `text`:str类型,返回输入音频的识别文字结果。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的语音识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m u2_conformer_aishell + ``` + + - 这样就完成了一个语音识别服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 需要识别的音频的存放路径,确保部署服务的机器可访问 + file = '/path/to/input.wav' + + # 以key的方式指定text传入预测方法的时的参数,此例中为"audio_file" + data = {"audio_file": file} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/u2_conformer_aishell" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install u2_conformer_aishell + ``` diff --git a/modules/thirdparty/image/classification/SnakeIdentification/__init__.py b/modules/audio/asr/u2_conformer_aishell/__init__.py similarity index 100% rename from modules/thirdparty/image/classification/SnakeIdentification/__init__.py rename to modules/audio/asr/u2_conformer_aishell/__init__.py diff --git a/modules/audio/asr/u2_conformer_aishell/assets/conf/augmentation.json b/modules/audio/asr/u2_conformer_aishell/assets/conf/augmentation.json new file mode 100644 index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93 --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/assets/conf/augmentation.json @@ -0,0 +1 @@ +{} diff --git a/modules/audio/asr/u2_conformer_aishell/assets/conf/conformer.yaml b/modules/audio/asr/u2_conformer_aishell/assets/conf/conformer.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b6925dfcf4ddb1a8f1b3d2dac2367e58ecabaa74 --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/assets/conf/conformer.yaml @@ -0,0 +1,102 @@ +data: + train_manifest: data/manifest.train + dev_manifest: data/manifest.dev + test_manifest: data/manifest.test + min_input_len: 0.5 + max_input_len: 20.0 # second + min_output_len: 0.0 + max_output_len: 400.0 + min_output_input_ratio: 0.05 + max_output_input_ratio: 10.0 + +collator: + vocab_filepath: data/vocab.txt + unit_type: 'char' + spm_model_prefix: '' + augmentation_config: conf/augmentation.json + batch_size: 64 + raw_wav: True # use raw_wav or kaldi feature + spectrum_type: fbank #linear, mfcc, fbank + feat_dim: 80 + delta_delta: False + dither: 1.0 + target_sample_rate: 16000 + max_freq: None + n_fft: None + stride_ms: 10.0 + window_ms: 25.0 + use_dB_normalization: False + target_dB: -20 + random_seed: 0 + keep_transcription_text: False + sortagrad: True + shuffle_method: batch_shuffle + num_workers: 2 + +decoding: + alpha: 2.5 + batch_size: 128 + beam_size: 10 + beta: 0.3 + ctc_weight: 0.0 + cutoff_prob: 1.0 + cutoff_top_n: 0 + decoding_chunk_size: -1 + decoding_method: attention + error_rate_type: cer + lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm + num_decoding_left_chunks: -1 + num_proc_bsearch: 8 + simulate_streaming: False +model: + cmvn_file: data/mean_std.json + cmvn_file_type: json + decoder: transformer + decoder_conf: + attention_heads: 4 + dropout_rate: 0.1 + linear_units: 2048 + num_blocks: 6 + positional_dropout_rate: 0.1 + self_attention_dropout_rate: 0.0 + src_attention_dropout_rate: 0.0 + encoder: conformer + encoder_conf: + activation_type: swish + attention_dropout_rate: 0.0 + attention_heads: 4 + cnn_module_kernel: 15 + dropout_rate: 0.1 + input_layer: conv2d + linear_units: 2048 + normalize_before: True + num_blocks: 12 + output_size: 256 + pos_enc_layer_type: rel_pos + positional_dropout_rate: 0.1 + selfattention_layer_type: rel_selfattn + use_cnn_module: True + input_dim: 0 + model_conf: + ctc_weight: 0.3 + ctc_dropoutrate: 0.0 + ctc_grad_norm_type: instance + length_normalized_loss: False + lsm_weight: 0.1 + output_dim: 0 +training: + accum_grad: 2 + global_grad_clip: 5.0 + log_interval: 100 + n_epoch: 300 + optim: adam + optim_conf: + lr: 0.002 + weight_decay: 1e-06 + scheduler: warmuplr + scheduler_conf: + lr_decay: 1.0 + warmup_steps: 25000 + checkpoint: + kbest_n: 50 + latest_n: 5 diff --git a/modules/audio/asr/u2_conformer_aishell/assets/data/mean_std.json b/modules/audio/asr/u2_conformer_aishell/assets/data/mean_std.json new file mode 100644 index 0000000000000000000000000000000000000000..fff0005df2937e09e3651089b55decf0f58dc47b --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/assets/data/mean_std.json @@ -0,0 +1 @@ +{"mean_stat": [533749178.75492024, 537379151.9412827, 553560684.251823, 587164297.7995199, 631868827.5506272, 662598279.7375823, 684377628.7270963, 695391900.076011, 692470493.5234187, 679434068.1698124, 666124153.9164762, 656323498.7897255, 665750586.0282139, 678693518.7836165, 681921713.5434498, 679622373.0941861, 669891550.4909347, 656595089.7941492, 653838531.0994304, 637678601.7858486, 628412248.7348012, 644835299.462052, 638840698.1892803, 646181879.4332589, 639724189.2981818, 642757470.3933163, 637471382.8647255, 642368839.4687729, 643414999.4559816, 647384269.1630985, 649348352.9727564, 649293860.0141628, 650234047.7200857, 654485430.6703687, 660474314.9996675, 667417041.2224753, 673157601.3226709, 675674470.304284, 675124085.6890339, 668017589.4583111, 670061307.6169846, 662625614.6886193, 663144526.4351237, 662504003.7634674, 666413530.1149732, 672263295.5639057, 678483738.2530766, 685387098.3034457, 692570857.529439, 699066050.4399202, 700784878.5879861, 701201520.50868, 702666292.305144, 705443439.2278953, 706070270.9023902, 705988909.8337733, 702843339.0362502, 699318566.4701376, 696089900.3030818, 687559674.541517, 675279201.9502573, 663676352.2301354, 662963751.7464145, 664300133.8414352, 666095384.4212626, 671682092.7777623, 676652386.6696675, 680097668.2490273, 683810023.0071762, 688701544.3655603, 692082724.9923568, 695788849.6782106, 701085780.0070009, 706389529.7959046, 711492753.1344281, 717637923.73355, 719691678.2081754, 715810733.4964175, 696362890.4862831, 604649423.9932467], "var_stat": [5413314850.92017, 5559847287.933615, 6150990253.613769, 6921242242.585692, 7999776708.347419, 8789877370.390867, 9405801233.462742, 9768050110.323652, 9759783206.942099, 9430647265.679018, 9090547056.72849, 8873147345.425886, 9155912918.518642, 9542539953.84679, 9653547618.806402, 9593434792.936714, 9316633026.420147, 8959273999.588833, 8863548125.445953, 8450615911.730164, 8211598033.615433, 8587083872.162145, 8432613574.987708, 8583943640.722399, 8401731458.393406, 8439359231.367369, 8293779802.711447, 8401506934.147289, 8427506949.839874, 8525176341.071184, 8577080109.482346, 8575106681.347283, 8594987363.896849, 8701703698.13697, 8854967559.695303, 9029484499.828356, 9168774993.437275, 9221457044.693224, 9194525496.858181, 8997085233.031223, 9024585998.805922, 8819398159.92156, 8807895653.788486, 8777245867.886335, 8869681168.825321, 9017397167.041729, 9173402827.38027, 9345595113.30765, 9530638054.282673, 9701241750.610865, 9749002220.142677, 9762753891.356327, 9802020174.527405, 9874432300.977995, 9883303068.689241, 9873499335.610315, 9780680890.924107, 9672603363.913414, 9569436761.47915, 9321842521.985804, 8968140697.297707, 8646348638.918655, 8616965457.523136, 8648620220.395298, 8702086138.675117, 8859213220.99842, 8999405313.087536, 9105949447.399998, 9220413227.016796, 9358601578.269663, 9451405873.00428, 9552727080.824707, 9695443509.54488, 9836687193.669691, 9970962418.410656, 10135881535.317768, 10189390919.400673, 10070483257.345238, 9532953296.22076, 7261219636.045063], "frame_num": 54068199} diff --git a/modules/audio/asr/u2_conformer_aishell/assets/data/vocab.txt b/modules/audio/asr/u2_conformer_aishell/assets/data/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..bf3f823b382998d734b885bb4c9718222b01d3fd --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/assets/data/vocab.txt @@ -0,0 +1,4233 @@ + + +一 +丁 +七 +万 +丈 +三 +上 +下 +不 +与 +丐 +丑 +专 +且 +世 +丘 +丙 +业 +丛 +东 +丝 +丞 +丢 +两 +严 +丧 +个 +丫 +中 +丰 +串 +临 +丸 +丹 +为 +主 +丽 +举 +乃 +久 +么 +义 +之 +乌 +乍 +乎 +乏 +乐 +乒 +乓 +乔 +乖 +乘 +乙 +九 +乞 +也 +习 +乡 +书 +买 +乱 +乳 +乾 +了 +予 +争 +事 +二 +于 +亏 +云 +互 +五 +井 +亚 +些 +亟 +亡 +亢 +交 +亥 +亦 +产 +亨 +亩 +享 +京 +亭 +亮 +亲 +亳 +亵 +人 +亿 +什 +仁 +仄 +仅 +仇 +今 +介 +仍 +从 +仑 +仓 +仔 +仕 +他 +仗 +付 +仙 +仡 +代 +令 +以 +仨 +仪 +们 +仰 +仲 +件 +价 +任 +份 +仿 +企 +伉 +伊 +伍 +伎 +伏 +伐 +休 +众 +优 +伙 +会 +伞 +伟 +传 +伢 +伤 +伦 +伪 +伯 +估 +伴 +伶 +伸 +伺 +似 +伽 +佃 +但 +位 +低 +住 +佐 +佑 +体 +何 +佘 +余 +佛 +作 +佟 +你 +佣 +佩 +佬 +佳 +佶 +佼 +使 +侃 +侄 +侈 +例 +侍 +侑 +侗 +供 +依 +侠 +侣 +侥 +侦 +侧 +侨 +侬 +侮 +侯 +侵 +便 +促 +俄 +俊 +俏 +俐 +俗 +俘 +俚 +保 +俞 +信 +俨 +俩 +俪 +俭 +修 +俯 +俱 +俸 +俺 +俾 +倍 +倒 +倘 +候 +倚 +倜 +借 +倡 +倦 +倩 +倪 +债 +值 +倾 +假 +偏 +做 +停 +健 +偶 +偷 +偿 +傅 +傍 +傥 +储 +催 +傲 +傻 +像 +僚 +僧 +僮 +僵 +僻 +儒 +儿 +兀 +允 +元 +兄 +充 +兆 +先 +光 +克 +免 +兑 +兔 +兖 +党 +兜 +兢 +入 +全 +八 +公 +六 +兰 +共 +关 +兴 +兵 +其 +具 +典 +兹 +养 +兼 +兽 +冀 +内 +冈 +冉 +册 +再 +冒 +冕 +写 +军 +农 +冠 +冤 +冥 +冬 +冯 +冰 +冲 +决 +况 +冶 +冷 +冻 +净 +凄 +准 +凇 +凉 +凋 +凌 +减 +凑 +凝 +几 +凡 +凤 +凭 +凯 +凰 +凳 +凶 +凸 +凹 +出 +击 +函 +凿 +刀 +刁 +刃 +分 +切 +刊 +刑 +划 +列 +刘 +则 +刚 +创 +初 +删 +判 +刨 +利 +别 +刮 +到 +制 +刷 +券 +刹 +刺 +刻 +剁 +剂 +剃 +削 +前 +剐 +剑 +剔 +剖 +剥 +剧 +剩 +剪 +副 +割 +剽 +剿 +劈 +力 +劝 +办 +功 +加 +务 +劣 +动 +助 +努 +劫 +励 +劲 +劳 +劵 +势 +勃 +勇 +勉 +勋 +勒 +勘 +募 +勤 +勺 +勾 +勿 +匀 +包 +匆 +匈 +匕 +化 +北 +匙 +匝 +匠 +匡 +匣 +匪 +匮 +匹 +区 +医 +匾 +匿 +十 +千 +升 +午 +卉 +半 +华 +协 +卑 +卒 +卓 +单 +卖 +南 +博 +卜 +卞 +占 +卡 +卢 +卤 +卦 +卧 +卫 +卯 +印 +危 +卲 +即 +却 +卵 +卷 +卸 +卿 +厂 +厄 +厅 +历 +厉 +压 +厌 +厕 +厘 +厚 +原 +厢 +厥 +厦 +厨 +厩 +厮 +去 +县 +参 +又 +叉 +及 +友 +双 +反 +发 +叔 +取 +受 +变 +叙 +叛 +叠 +口 +古 +句 +另 +叨 +叩 +只 +叫 +召 +叭 +叮 +可 +台 +叱 +史 +右 +叵 +叶 +号 +司 +叹 +叼 +吁 +吃 +各 +吆 +合 +吉 +吊 +同 +名 +后 +吏 +吐 +向 +吓 +吕 +吗 +君 +吝 +吞 +吟 +否 +吧 +吨 +吩 +含 +听 +吭 +启 +吴 +吵 +吸 +吹 +吻 +吼 +吾 +吿 +呀 +呃 +呆 +呈 +告 +呐 +呕 +呗 +员 +呛 +呜 +呢 +呦 +周 +呲 +味 +呵 +呼 +命 +咀 +咄 +咋 +和 +咎 +咏 +咐 +咒 +咔 +咕 +咖 +咚 +咣 +咤 +咧 +咨 +咪 +咫 +咬 +咯 +咱 +咳 +咸 +咽 +哀 +品 +哄 +哆 +哇 +哈 +哉 +响 +哎 +哑 +哒 +哗 +哟 +哥 +哦 +哨 +哪 +哭 +哲 +哺 +哼 +哽 +唁 +唇 +唉 +唏 +唐 +唠 +唤 +唬 +售 +唯 +唱 +唾 +啃 +商 +啊 +啕 +啡 +啤 +啥 +啦 +啧 +啪 +啬 +啰 +啵 +啶 +啸 +啼 +喀 +喂 +善 +喆 +喇 +喉 +喊 +喔 +喘 +喜 +喝 +喧 +喱 +喵 +喷 +喻 +喽 +嗅 +嗑 +嗒 +嗓 +嗡 +嗣 +嗤 +嗦 +嗨 +嗬 +嗯 +嗲 +嗷 +嗽 +嘀 +嘉 +嘎 +嘘 +嘛 +嘟 +嘭 +嘱 +嘲 +嘴 +嘻 +噎 +器 +噩 +噪 +噬 +噱 +噼 +嚎 +嚏 +嚓 +嚣 +嚷 +嚼 +囊 +囚 +四 +回 +因 +团 +囤 +囧 +园 +困 +围 +固 +国 +图 +圆 +圈 +土 +圣 +在 +圩 +圪 +圭 +地 +圳 +场 +圾 +址 +坂 +均 +坊 +坍 +坎 +坏 +坐 +坑 +块 +坚 +坛 +坝 +坞 +坟 +坠 +坡 +坤 +坦 +坪 +坯 +坷 +垂 +垃 +垄 +垅 +型 +垌 +垒 +垛 +垢 +垣 +垤 +垦 +垫 +垮 +埃 +埋 +城 +埔 +埜 +域 +培 +基 +堂 +堆 +堕 +堡 +堤 +堪 +堰 +堵 +塌 +塑 +塔 +塘 +塞 +填 +塬 +塾 +境 +墅 +墓 +墙 +增 +墟 +墨 +墩 +壁 +壑 +壕 +壤 +士 +壮 +声 +壳 +壶 +壹 +处 +备 +复 +夏 +夕 +外 +夙 +多 +夜 +够 +大 +天 +太 +夫 +夭 +央 +夯 +失 +头 +夷 +夸 +夹 +夺 +奂 +奇 +奈 +奉 +奋 +奎 +奏 +契 +奔 +奕 +奖 +套 +奘 +奚 +奠 +奢 +奥 +女 +奴 +奶 +奸 +她 +好 +如 +妃 +妄 +妆 +妇 +妈 +妊 +妍 +妒 +妖 +妙 +妞 +妤 +妥 +妧 +妨 +妩 +妮 +妯 +妹 +妻 +姆 +姊 +始 +姐 +姑 +姓 +委 +姗 +姚 +姜 +姝 +姣 +姥 +姨 +姬 +姻 +姿 +威 +娃 +娄 +娅 +娇 +娌 +娘 +娜 +娟 +娠 +娥 +娩 +娱 +娴 +娶 +娼 +婀 +婆 +婉 +婕 +婚 +婧 +婪 +婴 +婵 +婶 +婷 +婿 +媒 +媚 +媛 +媞 +媲 +媳 +嫁 +嫂 +嫉 +嫌 +嫔 +嫖 +嫚 +嫣 +嫦 +嫩 +嬉 +嬛 +嬷 +孀 +子 +孔 +孕 +字 +存 +孙 +孚 +孜 +孝 +孟 +孢 +季 +孤 +学 +孩 +孪 +孰 +孱 +孵 +孺 +宁 +它 +宅 +宇 +守 +安 +宋 +完 +宏 +宓 +宕 +宗 +官 +宙 +定 +宛 +宜 +宝 +实 +宠 +审 +客 +宣 +室 +宦 +宪 +宫 +宰 +害 +宴 +宵 +家 +宸 +容 +宽 +宾 +宿 +寂 +寄 +寅 +密 +寇 +富 +寐 +寒 +寓 +寝 +寞 +察 +寡 +寥 +寨 +寮 +寰 +寸 +对 +寺 +寻 +导 +寿 +封 +射 +将 +尊 +小 +少 +尔 +尖 +尘 +尚 +尝 +尤 +尧 +尬 +就 +尴 +尸 +尹 +尺 +尼 +尽 +尾 +尿 +局 +屁 +层 +居 +屈 +届 +屋 +屌 +屎 +屏 +屑 +展 +属 +屠 +屡 +履 +屯 +山 +屹 +屿 +岁 +岂 +岌 +岐 +岔 +岖 +岗 +岚 +岛 +岩 +岬 +岭 +岱 +岳 +岷 +岸 +峁 +峙 +峡 +峥 +峨 +峪 +峭 +峰 +峻 +崂 +崃 +崇 +崎 +崔 +崖 +崛 +崧 +崩 +崭 +崴 +嵋 +嵌 +嵘 +嵛 +嵩 +嶝 +巅 +巍 +川 +州 +巡 +巢 +工 +左 +巧 +巨 +巩 +巫 +差 +己 +已 +巴 +巷 +巾 +巿 +币 +市 +布 +帅 +帆 +师 +希 +帐 +帕 +帖 +帘 +帚 +帜 +帝 +带 +席 +帮 +帷 +常 +帼 +帽 +幂 +幄 +幅 +幌 +幕 +幢 +干 +平 +年 +并 +幸 +幺 +幻 +幼 +幽 +广 +庄 +庆 +庇 +床 +序 +庐 +库 +应 +底 +店 +庙 +庚 +府 +庞 +废 +度 +座 +庭 +庵 +康 +庸 +庾 +廉 +廊 +廓 +廖 +延 +廷 +建 +开 +异 +弃 +弄 +弈 +弊 +式 +弓 +引 +弗 +弘 +弛 +弟 +张 +弥 +弦 +弧 +弩 +弯 +弱 +弹 +强 +归 +当 +录 +彝 +形 +彤 +彦 +彩 +彪 +彬 +彭 +彰 +影 +彷 +役 +彻 +彼 +彿 +往 +征 +径 +待 +徇 +很 +徉 +徊 +律 +徐 +徒 +得 +徘 +徙 +御 +循 +微 +德 +徽 +心 +必 +忆 +忌 +忍 +忐 +忑 +志 +忘 +忙 +忠 +忧 +忪 +快 +忱 +念 +忽 +怀 +态 +怂 +怎 +怒 +怕 +怖 +怜 +思 +怠 +怡 +急 +怦 +性 +怨 +怪 +怯 +怵 +总 +恋 +恍 +恐 +恒 +恙 +恢 +恣 +恤 +恨 +恩 +恪 +恬 +恭 +息 +恰 +恳 +恶 +恸 +恺 +恼 +恿 +悄 +悉 +悍 +悔 +悖 +悚 +悟 +悠 +患 +悦 +您 +悬 +悯 +悲 +悴 +悸 +悼 +情 +惊 +惋 +惑 +惕 +惚 +惜 +惟 +惠 +惦 +惧 +惨 +惩 +惫 +惬 +惮 +惯 +惰 +想 +惶 +惹 +惺 +愁 +愈 +愉 +意 +愕 +愚 +感 +愤 +愧 +愿 +慈 +慌 +慎 +慑 +慕 +慢 +慧 +慨 +慰 +慷 +憋 +憔 +憧 +憨 +憩 +憬 +憷 +憾 +懂 +懈 +懊 +懋 +懒 +懵 +懿 +戈 +戎 +戏 +成 +我 +戒 +或 +战 +戚 +戛 +戟 +截 +戬 +戮 +戳 +戴 +户 +房 +所 +扁 +扇 +扉 +手 +才 +扎 +扑 +扒 +打 +扔 +托 +扛 +扣 +执 +扩 +扫 +扬 +扭 +扮 +扯 +扰 +扳 +扶 +批 +扼 +找 +承 +技 +抄 +抉 +把 +抑 +抒 +抓 +投 +抖 +抗 +折 +抚 +抛 +抠 +抡 +抢 +护 +报 +抨 +披 +抬 +抱 +抵 +抹 +押 +抽 +抿 +拄 +担 +拆 +拇 +拈 +拉 +拌 +拍 +拎 +拐 +拒 +拓 +拔 +拖 +拗 +拘 +拙 +招 +拜 +拟 +拢 +拣 +拥 +拦 +拧 +拨 +择 +括 +拭 +拮 +拯 +拱 +拳 +拴 +拷 +拼 +拽 +拾 +拿 +持 +挂 +指 +按 +挎 +挑 +挖 +挚 +挛 +挝 +挟 +挠 +挡 +挣 +挤 +挥 +挨 +挪 +挫 +振 +挺 +挽 +捂 +捅 +捆 +捉 +捍 +捎 +捏 +捐 +捕 +捞 +损 +捡 +换 +捣 +捧 +据 +捷 +捺 +捻 +掀 +掂 +授 +掉 +掌 +掏 +掐 +排 +掖 +掘 +掠 +探 +掣 +接 +控 +推 +掩 +措 +掬 +掮 +掰 +掴 +掷 +掺 +揉 +揍 +描 +提 +插 +握 +揣 +揩 +揪 +揭 +援 +揽 +搀 +搁 +搂 +搅 +搏 +搜 +搞 +搡 +搪 +搬 +搭 +携 +搽 +摁 +摄 +摆 +摇 +摊 +摒 +摔 +摘 +摧 +摩 +摸 +摹 +撂 +撇 +撑 +撒 +撕 +撞 +撤 +撩 +撬 +播 +撮 +撰 +撵 +撸 +撼 +擂 +擅 +操 +擎 +擒 +擘 +擞 +擦 +攀 +攒 +攥 +支 +收 +改 +攻 +放 +政 +故 +效 +敌 +敏 +救 +敖 +教 +敛 +敝 +敞 +敢 +散 +敦 +敬 +数 +敲 +整 +敷 +文 +斌 +斐 +斑 +斓 +斗 +料 +斛 +斜 +斟 +斤 +斥 +斧 +斩 +断 +斯 +新 +方 +施 +旁 +旅 +旋 +族 +旗 +无 +既 +日 +旦 +旧 +旨 +早 +旬 +旭 +旱 +时 +旷 +旺 +昀 +昂 +昆 +昊 +昌 +明 +昏 +易 +昔 +昕 +昙 +星 +映 +春 +昧 +昨 +昭 +是 +昱 +昵 +昼 +显 +晃 +晋 +晏 +晒 +晓 +晔 +晕 +晖 +晗 +晚 +晟 +晤 +晦 +晨 +普 +景 +晰 +晴 +晶 +智 +晾 +暂 +暄 +暇 +暑 +暖 +暗 +暧 +暨 +暮 +暴 +曙 +曝 +曦 +曰 +曲 +更 +曹 +曼 +曾 +替 +最 +月 +有 +朋 +服 +朐 +朔 +朗 +望 +朝 +期 +朦 +木 +未 +末 +本 +札 +术 +朱 +朴 +朵 +机 +朽 +杀 +杂 +权 +杆 +杉 +李 +杏 +材 +村 +杖 +杜 +杞 +束 +杠 +条 +来 +杨 +杭 +杯 +杰 +杳 +松 +板 +极 +构 +枉 +析 +枕 +林 +枚 +果 +枝 +枞 +枢 +枣 +枪 +枫 +枭 +枯 +架 +枷 +柄 +柏 +某 +染 +柔 +柜 +柞 +柠 +查 +柬 +柯 +柱 +柳 +柴 +柿 +栅 +标 +栈 +栋 +栏 +树 +栓 +栖 +栗 +校 +株 +样 +核 +根 +格 +栽 +栾 +桂 +桃 +框 +案 +桉 +桌 +桎 +桐 +桑 +桓 +桔 +档 +桥 +桦 +桩 +桶 +梁 +梅 +梓 +梗 +梦 +梧 +梨 +梭 +梯 +械 +梳 +梵 +检 +棉 +棋 +棍 +棒 +棕 +棘 +棚 +棠 +森 +棱 +棵 +棺 +椅 +椋 +植 +椎 +椒 +椰 +椿 +楂 +楔 +楚 +楞 +楠 +楣 +楷 +楼 +概 +榄 +榆 +榈 +榉 +榔 +榕 +榜 +榨 +榭 +榴 +榷 +榻 +槌 +槎 +槐 +槛 +槟 +槽 +槿 +樊 +樟 +模 +横 +樱 +橄 +橘 +橙 +橡 +橱 +檀 +檐 +檬 +欠 +次 +欢 +欣 +欧 +欲 +欺 +款 +歆 +歇 +歉 +歌 +止 +正 +此 +步 +武 +歧 +歪 +歹 +死 +殃 +殆 +殉 +殊 +残 +殒 +殓 +殖 +殚 +殡 +殭 +殴 +段 +殷 +殿 +毁 +毂 +毅 +毋 +母 +每 +毒 +毓 +比 +毕 +毗 +毙 +毛 +毫 +毯 +毽 +氏 +民 +氓 +气 +氛 +氟 +氢 +氦 +氧 +氨 +氪 +氮 +氯 +氰 +水 +永 +汀 +汁 +求 +汇 +汉 +汕 +汗 +汛 +汝 +汞 +江 +池 +污 +汤 +汪 +汰 +汲 +汴 +汶 +汹 +汽 +汾 +沁 +沃 +沅 +沈 +沉 +沏 +沐 +沓 +沙 +沛 +沟 +没 +沣 +沥 +沦 +沧 +沪 +沫 +沮 +沱 +河 +沸 +油 +治 +沼 +沽 +沾 +沿 +泄 +泉 +泊 +泌 +泓 +泔 +法 +泗 +泛 +泞 +泠 +泡 +波 +泣 +泥 +注 +泪 +泯 +泰 +泱 +泳 +泵 +泷 +泸 +泻 +泼 +泽 +泾 +洁 +洋 +洒 +洗 +洙 +洛 +洞 +津 +洪 +洱 +洲 +洵 +活 +洼 +洽 +派 +流 +浅 +浆 +浇 +浈 +浊 +测 +济 +浏 +浑 +浓 +浙 +浚 +浦 +浩 +浪 +浮 +浴 +海 +浸 +涂 +涅 +消 +涉 +涌 +涎 +涓 +涕 +涛 +涝 +涞 +涠 +涡 +涤 +润 +涧 +涨 +涩 +涮 +涯 +液 +涵 +涿 +淀 +淄 +淆 +淇 +淋 +淌 +淑 +淖 +淘 +淝 +淞 +淡 +淤 +淫 +淮 +深 +淳 +混 +淹 +添 +淼 +渀 +清 +渊 +渍 +渎 +渐 +渔 +渗 +渚 +渝 +渠 +渡 +渣 +渤 +渥 +温 +渭 +港 +渲 +渴 +游 +渺 +湃 +湍 +湖 +湘 +湛 +湾 +湿 +溃 +溅 +溉 +源 +溜 +溢 +溥 +溧 +溪 +溯 +溶 +溺 +滁 +滇 +滋 +滑 +滔 +滕 +滚 +滞 +满 +滢 +滤 +滥 +滨 +滩 +滴 +漂 +漆 +漏 +漓 +演 +漕 +漠 +漩 +漫 +漭 +漯 +漱 +漳 +漾 +潇 +潘 +潜 +潞 +潢 +潭 +潮 +潼 +澄 +澈 +澎 +澜 +澡 +澳 +激 +濑 +濒 +濠 +濡 +濮 +瀑 +瀚 +瀛 +灌 +灞 +火 +灭 +灯 +灰 +灵 +灶 +灼 +灾 +灿 +炅 +炉 +炊 +炎 +炒 +炕 +炖 +炙 +炜 +炫 +炬 +炭 +炮 +炯 +炳 +炷 +炸 +点 +炼 +炽 +烁 +烂 +烃 +烈 +烊 +烘 +烙 +烟 +烤 +烦 +烧 +烨 +烫 +热 +烯 +烷 +烹 +烽 +焉 +焊 +焕 +焖 +焘 +焚 +焦 +焯 +焰 +焱 +然 +煊 +煌 +煎 +煜 +煞 +煤 +煦 +照 +煮 +煲 +熄 +熊 +熏 +熔 +熙 +熟 +熠 +熨 +熬 +熹 +燃 +燊 +燎 +燕 +燥 +爆 +爪 +爬 +爱 +爵 +父 +爷 +爸 +爹 +爽 +片 +版 +牌 +牙 +牛 +牟 +牡 +牢 +牧 +物 +牲 +牵 +特 +牺 +牾 +犀 +犊 +犒 +犬 +犯 +状 +犷 +犹 +狂 +狄 +狈 +狐 +狗 +狙 +狞 +狠 +狡 +狩 +独 +狭 +狮 +狰 +狱 +狸 +狼 +猎 +猖 +猛 +猜 +猝 +猥 +猩 +猪 +猫 +猬 +献 +猴 +猾 +猿 +獒 +獗 +獾 +玄 +率 +玉 +王 +玖 +玛 +玟 +玥 +玩 +玫 +玮 +环 +现 +玲 +玳 +玺 +玻 +珀 +珉 +珊 +珍 +珏 +珑 +珜 +珠 +班 +珮 +珲 +珺 +球 +琅 +理 +琉 +琊 +琏 +琐 +琛 +琢 +琥 +琦 +琪 +琬 +琰 +琳 +琴 +琵 +琶 +琼 +瑁 +瑄 +瑕 +瑙 +瑚 +瑛 +瑜 +瑞 +瑟 +瑰 +瑶 +瑾 +璀 +璃 +璇 +璋 +璐 +璞 +璧 +璨 +瓜 +瓢 +瓣 +瓦 +瓮 +瓯 +瓶 +瓷 +甄 +甘 +甚 +甜 +生 +甥 +用 +甩 +甫 +甬 +田 +由 +甲 +申 +电 +男 +甸 +町 +画 +畅 +畊 +界 +畏 +畔 +留 +畜 +略 +番 +畴 +畸 +畿 +疃 +疆 +疏 +疑 +疗 +疚 +疝 +疤 +疫 +疯 +疲 +疵 +疹 +疼 +疾 +病 +症 +痉 +痊 +痒 +痕 +痘 +痛 +痣 +痪 +痫 +痰 +痱 +痴 +痹 +痼 +瘀 +瘁 +瘟 +瘠 +瘤 +瘦 +瘩 +瘪 +瘫 +瘸 +瘾 +癌 +癖 +癣 +癫 +登 +白 +百 +皂 +的 +皆 +皇 +皋 +皎 +皓 +皖 +皙 +皮 +皱 +盆 +盈 +益 +盎 +盐 +监 +盒 +盔 +盖 +盗 +盘 +盛 +盟 +目 +盯 +盲 +直 +相 +盹 +盼 +盾 +省 +眈 +眉 +看 +真 +眠 +眨 +眬 +眯 +眶 +眷 +眺 +眼 +着 +睁 +睐 +睛 +睡 +督 +睦 +睫 +睬 +睹 +睿 +瞄 +瞅 +瞌 +瞎 +瞒 +瞟 +瞧 +瞩 +瞪 +瞬 +瞰 +瞳 +瞻 +瞿 +矗 +矛 +矜 +矢 +矣 +知 +矩 +矫 +短 +矮 +石 +矶 +矿 +码 +砂 +砌 +砍 +砒 +研 +砖 +砚 +砝 +砥 +砰 +砲 +破 +砷 +砸 +砺 +砾 +础 +硅 +硕 +硚 +硝 +硫 +硬 +确 +碉 +碌 +碍 +碎 +碑 +碗 +碘 +碚 +碟 +碧 +碰 +碱 +碳 +碴 +碾 +磁 +磅 +磊 +磋 +磐 +磕 +磡 +磨 +磴 +磷 +磺 +礁 +示 +礼 +社 +祁 +祈 +祉 +祖 +祛 +祝 +神 +祠 +祢 +祥 +票 +祭 +祯 +祷 +祸 +祺 +禀 +禁 +禄 +禅 +福 +禧 +禹 +禺 +离 +禽 +禾 +秀 +私 +秃 +秆 +秉 +秋 +种 +科 +秒 +秘 +租 +秣 +秤 +秦 +秧 +秩 +积 +称 +秸 +移 +秽 +稀 +程 +稍 +税 +稚 +稠 +稣 +稳 +稻 +稼 +稽 +稿 +穆 +穗 +穴 +究 +穷 +空 +穿 +突 +窃 +窄 +窈 +窍 +窑 +窒 +窕 +窖 +窗 +窘 +窜 +窝 +窟 +窥 +窦 +窨 +窿 +立 +竖 +站 +竞 +竟 +章 +竣 +童 +竭 +端 +竲 +竹 +竺 +竽 +竿 +笃 +笈 +笋 +笑 +笔 +笙 +笛 +符 +笨 +第 +笼 +等 +筋 +筐 +筑 +筒 +答 +策 +筛 +筱 +筵 +筷 +筹 +签 +简 +箍 +算 +管 +箫 +箭 +箱 +篇 +篡 +篪 +篮 +篷 +簇 +簧 +簸 +簿 +籁 +籍 +米 +类 +籽 +粉 +粒 +粕 +粗 +粘 +粟 +粤 +粥 +粪 +粮 +粱 +粹 +精 +糊 +糕 +糖 +糗 +糙 +糟 +糯 +系 +紊 +素 +索 +紧 +紫 +累 +絮 +綦 +繁 +纠 +红 +纣 +纤 +约 +级 +纪 +纬 +纯 +纰 +纱 +纲 +纳 +纵 +纶 +纷 +纸 +纹 +纺 +纽 +线 +练 +组 +绅 +细 +织 +终 +绊 +绌 +绍 +绎 +经 +绑 +绒 +结 +绕 +绘 +给 +绚 +络 +绝 +绞 +统 +绣 +继 +绩 +绪 +续 +绮 +绯 +绰 +绳 +维 +绵 +绷 +绸 +综 +绽 +绿 +缀 +缄 +缅 +缆 +缇 +缉 +缓 +缔 +缕 +编 +缘 +缙 +缚 +缜 +缝 +缠 +缤 +缨 +缩 +缪 +缭 +缮 +缰 +缴 +缸 +缺 +罂 +罄 +罐 +网 +罕 +罗 +罚 +罡 +罢 +罩 +罪 +置 +署 +罹 +羁 +羊 +美 +羚 +羞 +羡 +羣 +群 +羲 +羹 +羽 +羿 +翁 +翅 +翌 +翔 +翘 +翟 +翠 +翡 +翩 +翰 +翱 +翻 +翼 +耀 +老 +考 +耄 +者 +耋 +而 +耍 +耐 +耒 +耕 +耗 +耘 +耳 +耶 +耷 +耸 +耻 +耽 +耿 +聂 +聆 +聊 +聋 +职 +联 +聘 +聚 +聪 +肃 +肆 +肇 +肉 +肋 +肌 +肖 +肘 +肚 +肛 +肝 +肠 +股 +肢 +肤 +肥 +肩 +肪 +肮 +肯 +育 +肴 +肺 +肾 +肿 +胀 +胁 +胃 +胆 +背 +胎 +胖 +胚 +胛 +胜 +胞 +胡 +胤 +胧 +胫 +胯 +胰 +胱 +胳 +胶 +胸 +胺 +能 +脂 +脆 +脉 +脊 +脍 +脏 +脐 +脑 +脖 +脚 +脯 +脱 +脸 +脾 +腆 +腊 +腋 +腌 +腐 +腑 +腓 +腔 +腕 +腥 +腩 +腰 +腱 +腹 +腺 +腻 +腼 +腾 +腿 +膀 +膊 +膏 +膑 +膛 +膜 +膝 +膨 +膳 +膺 +臀 +臂 +臃 +臆 +臣 +自 +臭 +至 +致 +臻 +舀 +舅 +舆 +舌 +舍 +舒 +舛 +舜 +舞 +舟 +航 +般 +舰 +舱 +舵 +舶 +舸 +船 +艇 +艋 +艘 +良 +艰 +色 +艳 +艺 +艾 +节 +芊 +芋 +芒 +芙 +芜 +芝 +芦 +芬 +芭 +芮 +芯 +花 +芳 +芷 +芸 +芽 +苇 +苍 +苏 +苑 +苗 +苛 +苟 +苡 +苣 +若 +苦 +苯 +英 +苹 +茁 +茂 +范 +茄 +茅 +茆 +茎 +茗 +茜 +茨 +茫 +茵 +茶 +茸 +茹 +荃 +荆 +草 +荐 +荒 +荔 +荚 +荞 +荟 +荡 +荣 +荤 +荧 +荫 +药 +荷 +荼 +莅 +莆 +莉 +莎 +莓 +莘 +莞 +莠 +莫 +莱 +莲 +莴 +获 +莹 +莺 +莽 +菁 +菇 +菊 +菌 +菜 +菠 +菡 +菩 +菱 +菲 +萃 +萄 +萋 +萌 +萍 +萎 +萝 +萤 +营 +萦 +萧 +萨 +萱 +落 +葆 +著 +葛 +葡 +董 +葩 +葫 +葬 +葱 +葵 +蒂 +蒋 +蒙 +蒜 +蒲 +蒸 +蒿 +蓁 +蓄 +蓉 +蓝 +蓟 +蓬 +蔑 +蔓 +蔗 +蔚 +蔡 +蔫 +蔬 +蔷 +蔺 +蔽 +蕉 +蕊 +蕙 +蕲 +蕴 +蕾 +薄 +薇 +薛 +薪 +薯 +薰 +藏 +藜 +藤 +藩 +藻 +蘑 +虎 +虐 +虑 +虚 +虞 +虫 +虱 +虹 +虽 +虾 +蚀 +蚁 +蚂 +蚊 +蚌 +蚓 +蚕 +蚝 +蚣 +蚯 +蛀 +蛇 +蛋 +蛐 +蛙 +蛛 +蛟 +蛮 +蛰 +蜀 +蜂 +蜇 +蜈 +蜊 +蜒 +蜓 +蜕 +蜘 +蜚 +蜜 +蜡 +蜥 +蜴 +蜷 +蜿 +蝇 +蝉 +蝎 +蝗 +蝙 +蝠 +蝴 +蝶 +螂 +螃 +融 +螳 +螺 +蟑 +蟹 +蠢 +血 +衅 +行 +衍 +衔 +街 +衙 +衡 +衣 +补 +表 +衫 +衬 +衰 +衷 +袁 +袂 +袄 +袆 +袈 +袋 +袍 +袒 +袖 +袜 +被 +袭 +袱 +裁 +裂 +装 +裆 +裔 +裕 +裙 +裟 +裤 +裳 +裴 +裸 +裹 +褂 +褒 +褓 +褚 +褛 +褪 +褴 +褶 +襁 +襄 +襟 +西 +要 +覃 +覆 +见 +观 +规 +觅 +视 +览 +觉 +觊 +觎 +觐 +觑 +角 +解 +觥 +触 +言 +詹 +誉 +誓 +警 +譬 +计 +订 +认 +讧 +讨 +让 +讪 +训 +议 +讯 +记 +讲 +讳 +讶 +许 +讹 +论 +讼 +讽 +设 +访 +诀 +证 +评 +诅 +识 +诈 +诉 +诊 +词 +译 +诓 +试 +诗 +诙 +诚 +话 +诞 +诟 +诠 +诡 +询 +该 +详 +诧 +诩 +诫 +诬 +语 +误 +诱 +诲 +说 +诵 +诶 +请 +诸 +诺 +读 +诽 +课 +诿 +谀 +谁 +调 +谅 +谈 +谊 +谋 +谌 +谍 +谎 +谐 +谑 +谓 +谕 +谙 +谚 +谜 +谢 +谣 +谤 +谦 +谨 +谩 +谬 +谭 +谱 +谴 +谷 +豁 +豆 +豚 +象 +豪 +豫 +豹 +貅 +貉 +貌 +貔 +贝 +贞 +负 +贡 +财 +责 +贤 +败 +账 +货 +质 +贩 +贪 +贫 +贬 +购 +贮 +贯 +贱 +贴 +贵 +贷 +贸 +费 +贺 +贼 +贾 +贿 +赁 +赂 +赃 +资 +赋 +赌 +赎 +赏 +赐 +赔 +赖 +赘 +赚 +赛 +赝 +赞 +赠 +赡 +赢 +赣 +赤 +赦 +赫 +走 +赴 +赵 +赶 +起 +趁 +超 +越 +趋 +趟 +趣 +足 +趴 +趸 +趾 +跃 +跄 +跆 +跌 +跑 +跛 +距 +跟 +跤 +跨 +跪 +路 +跳 +践 +跷 +跺 +跻 +踉 +踊 +踏 +踝 +踞 +踢 +踩 +踪 +踵 +踹 +蹂 +蹄 +蹈 +蹊 +蹚 +蹦 +蹬 +蹭 +蹲 +蹴 +蹶 +蹼 +蹿 +躁 +躏 +身 +躬 +躯 +躲 +躺 +车 +轧 +轨 +轩 +转 +轮 +软 +轰 +轴 +轶 +轻 +载 +轿 +较 +辄 +辅 +辆 +辈 +辉 +辍 +辐 +辑 +输 +辖 +辗 +辘 +辙 +辛 +辜 +辞 +辟 +辣 +辨 +辩 +辫 +辰 +辱 +边 +辽 +达 +迁 +迂 +迄 +迅 +过 +迈 +迎 +运 +近 +返 +还 +这 +进 +远 +违 +连 +迟 +迢 +迥 +迪 +迫 +迭 +述 +迷 +迸 +迹 +追 +退 +送 +适 +逃 +逅 +逆 +选 +逊 +逍 +透 +逐 +递 +途 +逗 +通 +逛 +逝 +逞 +速 +造 +逡 +逢 +逮 +逵 +逸 +逻 +逼 +逾 +遁 +遂 +遇 +遍 +遏 +遐 +道 +遗 +遛 +遢 +遣 +遥 +遨 +遭 +遮 +遴 +遵 +避 +邀 +邂 +邃 +邋 +邑 +邓 +邛 +邝 +邢 +那 +邦 +邪 +邬 +邮 +邯 +邱 +邵 +邹 +邺 +邻 +郁 +郊 +郎 +郑 +郜 +郝 +郡 +部 +郫 +郭 +郸 +都 +鄂 +鄙 +鄞 +鄢 +酋 +酌 +配 +酒 +酗 +酝 +酣 +酪 +酬 +酯 +酱 +酵 +酶 +酷 +酸 +酿 +醇 +醉 +醋 +醍 +醐 +醒 +醛 +采 +釉 +释 +里 +重 +野 +量 +金 +釜 +鉴 +鏖 +鑫 +针 +钉 +钊 +钓 +钛 +钝 +钞 +钟 +钠 +钢 +钥 +钦 +钧 +钩 +钮 +钰 +钱 +钵 +钻 +钾 +铀 +铁 +铂 +铃 +铅 +铆 +铉 +铎 +铐 +铜 +铝 +铠 +铣 +铨 +铬 +铭 +铮 +铰 +铲 +银 +铸 +铺 +链 +铿 +销 +锁 +锂 +锄 +锅 +锆 +锈 +锋 +锌 +锏 +锐 +错 +锜 +锟 +锡 +锢 +锣 +锤 +锥 +锦 +锭 +键 +锯 +锰 +锵 +锷 +锹 +锻 +镀 +镁 +镇 +镉 +镊 +镍 +镑 +镖 +镜 +镯 +镳 +镶 +长 +门 +闪 +闫 +闭 +问 +闯 +闰 +闲 +闳 +间 +闵 +闷 +闸 +闹 +闺 +闻 +闽 +阀 +阁 +阂 +阅 +阎 +阐 +阔 +阙 +阚 +阜 +队 +阮 +阱 +防 +阳 +阴 +阵 +阶 +阻 +阿 +陀 +陂 +附 +际 +陆 +陈 +陋 +陌 +降 +限 +陕 +陡 +院 +除 +陨 +险 +陪 +陬 +陵 +陶 +陷 +隅 +隆 +隋 +隍 +随 +隐 +隔 +隘 +隙 +障 +隧 +隶 +隼 +隽 +难 +雀 +雁 +雄 +雅 +集 +雇 +雌 +雍 +雏 +雕 +雨 +雪 +雯 +雳 +零 +雷 +雾 +需 +霁 +霄 +霆 +震 +霈 +霉 +霍 +霎 +霏 +霖 +霜 +霞 +露 +霸 +霹 +霾 +靑 +青 +靓 +靖 +静 +靛 +非 +靠 +靡 +面 +革 +靳 +靴 +靶 +鞋 +鞍 +鞘 +鞠 +鞭 +韦 +韧 +韩 +韬 +音 +韵 +韶 +页 +顶 +顷 +项 +顺 +须 +顽 +顾 +顿 +颁 +颂 +预 +颅 +领 +颇 +颈 +颊 +颍 +颐 +频 +颓 +颖 +颗 +题 +颚 +颜 +额 +颠 +颤 +风 +飒 +飓 +飘 +飙 +飚 +飞 +食 +餐 +餮 +饕 +饥 +饪 +饭 +饮 +饰 +饱 +饲 +饵 +饶 +饺 +饼 +饽 +饿 +馀 +馅 +馆 +馈 +馊 +馋 +馑 +馒 +首 +馗 +香 +馥 +馨 +马 +驭 +驯 +驰 +驱 +驳 +驴 +驶 +驻 +驼 +驾 +驿 +骁 +骂 +骄 +骅 +骆 +骇 +骊 +骋 +验 +骏 +骐 +骑 +骗 +骚 +骜 +骤 +骥 +骨 +骷 +骸 +骼 +髅 +髋 +髓 +高 +髦 +鬼 +魁 +魂 +魄 +魅 +魇 +魏 +魔 +鱼 +鲁 +鲍 +鲜 +鲟 +鲨 +鲶 +鲷 +鲸 +鳄 +鳅 +鳌 +鳖 +鳝 +鳞 +鸟 +鸠 +鸡 +鸣 +鸥 +鸦 +鸭 +鸯 +鸳 +鸵 +鸽 +鸾 +鸿 +鹃 +鹅 +鹊 +鹏 +鹜 +鹞 +鹤 +鹭 +鹰 +鹿 +麋 +麒 +麓 +麟 +麦 +麻 +麾 +黄 +黍 +黎 +黏 +黑 +黔 +默 +黛 +黝 +黯 +鼎 +鼓 +鼠 +鼻 +鼾 +齐 +齿 +龄 +龙 +龚 +龟 + diff --git a/modules/audio/asr/u2_conformer_aishell/module.py b/modules/audio/asr/u2_conformer_aishell/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8ce728041a036a66a44014378f965cea1c4b04d6 --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/module.py @@ -0,0 +1,73 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +import sys + +import numpy as np +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger + +import paddle +import soundfile as sf + +# TODO: Remove system path when deepspeech can be installed via pip. +sys.path.append(os.path.join(MODULE_HOME, 'u2_conformer_aishell')) +from deepspeech.exps.u2.config import get_cfg_defaults +from deepspeech.utils.utility import UpdateConfig +from .u2_conformer_tester import U2ConformerTester + + +@moduleinfo(name="u2_conformer_aishell", version="1.0.0", summary="", author="Baidu", author_email="", type="audio/asr") +class U2Conformer(paddle.nn.Layer): + def __init__(self): + super(U2Conformer, self).__init__() + + # resource + res_dir = os.path.join(MODULE_HOME, 'u2_conformer_aishell', 'assets') + conf_file = os.path.join(res_dir, 'conf/conformer.yaml') + checkpoint = os.path.join(res_dir, 'checkpoints/avg_20.pdparams') + + # config + self.config = get_cfg_defaults() + self.config.merge_from_file(conf_file) + + # TODO: Remove path updating snippet. + with UpdateConfig(self.config): + self.config.collator.vocab_filepath = os.path.join(res_dir, self.config.collator.vocab_filepath) + # self.config.collator.spm_model_prefix = os.path.join(res_dir, self.config.collator.spm_model_prefix) + self.config.collator.augmentation_config = os.path.join(res_dir, self.config.collator.augmentation_config) + self.config.model.cmvn_file = os.path.join(res_dir, self.config.model.cmvn_file) + self.config.decoding.decoding_method = 'attention_rescoring' + self.config.decoding.batch_size = 1 + + # model + self.tester = U2ConformerTester(self.config) + self.tester.setup_model() + self.tester.resume(checkpoint) + + @staticmethod + def check_audio(audio_file): + sig, sample_rate = sf.read(audio_file) + assert sample_rate == 16000, 'Excepting sample rate of input audio is 16000, but got {}'.format(sample_rate) + + @serving + def speech_recognize(self, audio_file, device='cpu'): + assert os.path.isfile(audio_file), 'File not exists: {}'.format(audio_file) + self.check_audio(audio_file) + + paddle.set_device(device) + return self.tester.test(audio_file)[0][0] diff --git a/modules/audio/asr/u2_conformer_aishell/requirements.txt b/modules/audio/asr/u2_conformer_aishell/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..49fb307f43939536be9ee5661a5a712aeba0792b --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/requirements.txt @@ -0,0 +1,12 @@ +loguru +yacs +jsonlines +scipy==1.2.1 +sentencepiece +resampy==0.2.2 +SoundFile==0.9.0.post1 +soxbindings +kaldiio +typeguard +editdistance +textgrid diff --git a/modules/audio/asr/u2_conformer_aishell/u2_conformer_tester.py b/modules/audio/asr/u2_conformer_aishell/u2_conformer_tester.py new file mode 100644 index 0000000000000000000000000000000000000000..c4f8d47055e29d1522c224e15439c9575270cc96 --- /dev/null +++ b/modules/audio/asr/u2_conformer_aishell/u2_conformer_tester.py @@ -0,0 +1,80 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Evaluation for U2 model.""" +import os +import sys + +import paddle + +from deepspeech.frontend.featurizer.text_featurizer import TextFeaturizer +from deepspeech.io.collator import SpeechCollator +from deepspeech.models.u2 import U2Model +from deepspeech.utils import mp_tools +from deepspeech.utils.utility import UpdateConfig + + +class U2ConformerTester: + def __init__(self, config): + self.config = config + self.collate_fn_test = SpeechCollator.from_config(config) + self._text_featurizer = TextFeaturizer( + unit_type=config.collator.unit_type, vocab_filepath=None, spm_model_prefix=config.collator.spm_model_prefix) + + @mp_tools.rank_zero_only + @paddle.no_grad() + def test(self, audio_file): + self.model.eval() + cfg = self.config.decoding + collate_fn_test = self.collate_fn_test + audio, _ = collate_fn_test.process_utterance(audio_file=audio_file, transcript="Hello") + audio_len = audio.shape[0] + audio = paddle.to_tensor(audio, dtype='float32') + audio_len = paddle.to_tensor(audio_len) + audio = paddle.unsqueeze(audio, axis=0) + vocab_list = collate_fn_test.vocab_list + + text_feature = self.collate_fn_test.text_feature + result_transcripts = self.model.decode( + audio, + audio_len, + text_feature=text_feature, + decoding_method=cfg.decoding_method, + lang_model_path=cfg.lang_model_path, + beam_alpha=cfg.alpha, + beam_beta=cfg.beta, + beam_size=cfg.beam_size, + cutoff_prob=cfg.cutoff_prob, + cutoff_top_n=cfg.cutoff_top_n, + num_processes=cfg.num_proc_bsearch, + ctc_weight=cfg.ctc_weight, + decoding_chunk_size=cfg.decoding_chunk_size, + num_decoding_left_chunks=cfg.num_decoding_left_chunks, + simulate_streaming=cfg.simulate_streaming) + + return result_transcripts + + def setup_model(self): + config = self.config.clone() + with UpdateConfig(config): + config.model.input_dim = self.collate_fn_test.feature_size + config.model.output_dim = self.collate_fn_test.vocab_size + + self.model = U2Model.from_config(config.model) + + def resume(self, checkpoint): + """Resume from the checkpoint at checkpoints in the output + directory or load a specified checkpoint. + """ + model_dict = paddle.load(checkpoint) + self.model.set_state_dict(model_dict) diff --git a/modules/audio/asr/u2_conformer_librispeech/README.md b/modules/audio/asr/u2_conformer_librispeech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8e4f12fef792ba73bb7651a4f96a336e58fa8f00 --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/README.md @@ -0,0 +1,156 @@ +# u2_conformer_librispeech + +|模型名称|u2_conformer_librispeech| +| :--- | :---: | +|类别|语音-语音识别| +|网络|Conformer| +|数据集|LibriSpeech| +|是否支持Fine-tuning|否| +|模型大小|191MB| +|最新更新日期|2021-11-01| +|数据指标|英文WER 0.034| + +## 一、模型基本信息 + +### 模型介绍 + +U2 Conformer模型是一种适用于英文和中文的end-to-end语音识别模型。u2_conformer_libirspeech采用了conformer的encoder和transformer的decoder的模型结构,并且使用了ctc-prefix beam search的方式进行一遍打分,再利用attention decoder进行二次打分的方式进行解码来得到最终结果。 + +u2_conformer_libirspeech在英文开源语音数据集[LibriSpeech ASR corpus](http://www.openslr.org/12/)进行了预训练,该模型在其测试集上的WER指标是0.034655。 + +

+
+

+ +

+
+

+ +更多详情请参考: +- [Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition](https://arxiv.org/abs/2012.05481) +- [Conformer: Convolution-augmented Transformer for Speech Recognition](https://arxiv.org/abs/2005.08100) + +## 二、安装 + +- ### 1、系统依赖 + + - libsndfile + - Linux + ```shell + $ sudo apt-get install libsndfile + or + $ sudo yum install libsndfile + ``` + - MacOs + ``` + $ brew install libsndfile + ``` + +- ### 2、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 3、安装 + + - ```shell + $ hub install u2_conformer_librispeech + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + # 采样率为16k,格式为wav的英文语音音频 + wav_file = '/PATH/TO/AUDIO' + + model = hub.Module( + name='u2_conformer_librispeech', + version='1.0.0') + text = model.speech_recognize(wav_file) + + print(text) + ``` + +- ### 2、API + - ```python + def check_audio(audio_file) + ``` + - 检查输入音频格式和采样率是否满足为16000 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + + - ```python + def speech_recognize( + audio_file, + device='cpu', + ) + ``` + - 将输入的音频识别成文字 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `text`:str类型,返回输入音频的识别文字结果。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的语音识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m u2_conformer_librispeech + ``` + + - 这样就完成了一个语音识别服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 需要识别的音频的存放路径,确保部署服务的机器可访问 + file = '/path/to/input.wav' + + # 以key的方式指定text传入预测方法的时的参数,此例中为"audio_file" + data = {"audio_file": file} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/u2_conformer_librispeech" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install u2_conformer_librispeech + ``` diff --git a/modules/thirdparty/image/classification/food_classification/__init__.py b/modules/audio/asr/u2_conformer_librispeech/__init__.py similarity index 100% rename from modules/thirdparty/image/classification/food_classification/__init__.py rename to modules/audio/asr/u2_conformer_librispeech/__init__.py diff --git a/modules/audio/asr/u2_conformer_librispeech/assets/conf/augmentation.json b/modules/audio/asr/u2_conformer_librispeech/assets/conf/augmentation.json new file mode 100644 index 0000000000000000000000000000000000000000..0967ef424bce6791893e9a57bb952f80fd536e93 --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/assets/conf/augmentation.json @@ -0,0 +1 @@ +{} diff --git a/modules/audio/asr/u2_conformer_librispeech/assets/conf/conformer.yaml b/modules/audio/asr/u2_conformer_librispeech/assets/conf/conformer.yaml new file mode 100644 index 0000000000000000000000000000000000000000..72342e449eb1837a3965f3662a221d8adec61ab4 --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/assets/conf/conformer.yaml @@ -0,0 +1,116 @@ +# https://yaml.org/type/float.html +data: + train_manifest: data/manifest.test-clean + dev_manifest: data/manifest.test-clean + test_manifest: data/manifest.test-clean + min_input_len: 0.5 # seconds + max_input_len: 30.0 # seconds + min_output_len: 0.0 # tokens + max_output_len: 400.0 # tokens + min_output_input_ratio: 0.05 + max_output_input_ratio: 100.0 + +collator: + vocab_filepath: data/vocab.txt + unit_type: 'spm' + spm_model_prefix: 'data/bpe_unigram_5000' + mean_std_filepath: "" + augmentation_config: conf/augmentation.json + batch_size: 16 + raw_wav: True # use raw_wav or kaldi feature + spectrum_type: fbank #linear, mfcc, fbank + feat_dim: 80 + delta_delta: False + dither: 1.0 + target_sample_rate: 16000 + max_freq: None + n_fft: None + stride_ms: 10.0 + window_ms: 25.0 + use_dB_normalization: True + target_dB: -20 + random_seed: 0 + keep_transcription_text: False + sortagrad: True + shuffle_method: batch_shuffle + num_workers: 2 + + +# network architecture +model: + cmvn_file: "data/mean_std.json" + cmvn_file_type: "json" + # encoder related + encoder: conformer + encoder_conf: + output_size: 256 # dimension of attention + attention_heads: 4 + linear_units: 2048 # the number of units of position-wise feed forward + num_blocks: 12 # the number of encoder blocks + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + attention_dropout_rate: 0.0 + input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8 + normalize_before: True + use_cnn_module: True + cnn_module_kernel: 15 + activation_type: 'swish' + pos_enc_layer_type: 'rel_pos' + selfattention_layer_type: 'rel_selfattn' + + # decoder related + decoder: transformer + decoder_conf: + attention_heads: 4 + linear_units: 2048 + num_blocks: 6 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + self_attention_dropout_rate: 0.0 + src_attention_dropout_rate: 0.0 + + # hybrid CTC/attention + model_conf: + ctc_weight: 0.3 + ctc_dropoutrate: 0.0 + ctc_grad_norm_type: instance + lsm_weight: 0.1 # label smoothing option + length_normalized_loss: false + + +training: + n_epoch: 120 + accum_grad: 8 + global_grad_clip: 3.0 + optim: adam + optim_conf: + lr: 0.004 + weight_decay: 1e-06 + scheduler: warmuplr # pytorch v1.1.0+ required + scheduler_conf: + warmup_steps: 25000 + lr_decay: 1.0 + log_interval: 100 + checkpoint: + kbest_n: 50 + latest_n: 5 + + +decoding: + batch_size: 64 + error_rate_type: wer + decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring' + lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm + alpha: 2.5 + beta: 0.3 + beam_size: 10 + cutoff_prob: 1.0 + cutoff_top_n: 0 + num_proc_bsearch: 8 + ctc_weight: 0.5 # ctc weight for attention rescoring decode mode. + decoding_chunk_size: -1 # decoding chunk size. Defaults to -1. + # <0: for decoding, use full chunk. + # >0: for decoding, use fixed chunk size as set. + # 0: used for training, it's prohibited here. + num_decoding_left_chunks: -1 # number of left chunks for decoding. Defaults to -1. + simulate_streaming: False # simulate streaming inference. Defaults to False. diff --git a/modules/audio/asr/u2_conformer_librispeech/assets/data/bpe_unigram_5000.model b/modules/audio/asr/u2_conformer_librispeech/assets/data/bpe_unigram_5000.model new file mode 100644 index 0000000000000000000000000000000000000000..ad6748af9e3f3ab9c36052b28d46084b7c8f315d Binary files /dev/null and b/modules/audio/asr/u2_conformer_librispeech/assets/data/bpe_unigram_5000.model differ diff --git a/modules/audio/asr/u2_conformer_librispeech/assets/data/bpe_unigram_5000.vocab b/modules/audio/asr/u2_conformer_librispeech/assets/data/bpe_unigram_5000.vocab new file mode 100644 index 0000000000000000000000000000000000000000..7e0ff98ce2e00bf26a8ae3a015556bbd21f8bdc5 --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/assets/data/bpe_unigram_5000.vocab @@ -0,0 +1,5000 @@ + 0 + 0 + 0 +▁the -2.9911 +s -3.44691 +▁and -3.58286 +▁of -3.70894 +▁to -3.78001 +▁a -3.89871 +▁in -4.20996 +▁i -4.36145 +▁he -4.48281 +▁that -4.55289 +ed -4.59016 +▁was -4.59181 +▁it -4.62484 +' -4.81583 +▁his -4.84177 +ing -4.88039 +▁you -4.99998 +▁with -5.00838 +▁for -5.02039 +t -5.0555 +▁had -5.07751 +▁as -5.09744 +▁her -5.13191 +▁be -5.19505 +▁is -5.19882 +▁but -5.21324 +▁not -5.22608 +▁she -5.23394 +d -5.27841 +▁at -5.34023 +▁on -5.34498 +ly -5.40443 +▁him -5.50709 +▁they -5.56045 +▁all -5.58704 +▁have -5.59768 +▁by -5.60002 +▁ -5.60186 +▁so -5.61262 +e -5.61903 +▁this -5.62164 +▁my -5.64057 +▁which -5.64669 +▁me -5.69076 +▁said -5.70437 +▁from -5.70664 +▁one -5.7513 +▁were -5.78541 +▁we -5.82874 +y -5.85619 +▁no -5.88631 +▁there -5.90758 +n -5.91704 +er -5.92896 +▁or -5.93481 +▁an -5.95345 +▁when -5.96716 +▁are -6.01743 +▁their -6.0437 +▁would -6.05331 +▁if -6.06359 +▁what -6.0895 +▁them -6.08963 +▁who -6.10441 +▁do -6.134 +▁out -6.14848 +▁will -6.16929 +▁up -6.18755 +m -6.19966 +▁been -6.20889 +▁man -6.28662 +▁then -6.31167 +▁could -6.37658 +r -6.38978 +p -6.401 +▁more -6.40231 +▁into -6.4095 +▁now -6.45621 +es -6.45723 +▁very -6.46767 +▁your -6.47768 +c -6.49829 +▁some -6.5032 +▁little -6.52174 +▁time -6.53362 +▁can -6.57863 +▁like -6.58001 +ll -6.58456 +re -6.59459 +▁about -6.6011 +▁has -6.63724 +▁than -6.64773 +▁did -6.64974 +▁upon -6.66755 +l -6.67708 +▁over -6.6829 +▁any -6.69691 +in -6.70055 +▁well -6.70679 +▁only -6.70884 +▁see -6.72382 +▁good -6.7302 +▁other -6.73256 +▁two -6.73281 +al -6.76971 +▁know -6.77014 +b -6.77332 +▁go -6.78028 +▁down -6.78382 +▁before -6.79386 +a -6.80864 +▁our -6.81482 +▁old -6.82309 +▁should -6.82836 +▁made -6.82895 +▁after -6.84628 +▁great -6.85243 +▁day -6.85544 +▁must -6.87627 +▁come -6.87777 +▁how -6.87869 +▁such -6.88362 +▁came -6.88807 +▁where -6.89779 +▁us -6.90031 +▁never -6.92945 +le -6.93511 +▁these -6.95338 +▁much -6.95525 +▁mister -6.96536 +▁de -6.975 +or -6.98345 +▁may -6.98676 +▁long -7.01388 +▁way -7.01809 +▁first -7.04141 +▁back -7.05466 +▁own -7.05634 +▁am -7.05808 +▁again -7.06591 +▁say -7.07176 +▁men -7.07357 +▁went -7.07513 +▁himself -7.07891 +▁here -7.09085 +ion -7.10388 +▁think -7.10393 +ness -7.10433 +en -7.11572 +▁even -7.12414 +g -7.12655 +▁thought -7.12694 +▁hand -7.1271 +u -7.13322 +▁just -7.13401 +ve -7.14094 +▁its -7.15029 +o -7.16142 +▁un -7.16965 +▁re -7.1721 +▁make -7.17463 +▁might -7.1793 +ation -7.18013 +▁too -7.18635 +on -7.1907 +▁away -7.19477 +st -7.19708 +▁life -7.20558 +▁without -7.21952 +▁o -7.22087 +▁through -7.22747 +▁most -7.22784 +ic -7.22971 +▁take -7.23593 +▁don -7.23927 +▁every -7.24535 +th -7.25167 +▁shall -7.25978 +▁those -7.26214 +▁eyes -7.27376 +▁still -7.28725 +▁last -7.29948 +▁house -7.30575 +▁head -7.3073 +▁nothing -7.31319 +▁night -7.3151 +able -7.32761 +▁off -7.33689 +ity -7.33883 +▁let -7.33975 +▁many -7.34144 +ar -7.34535 +▁being -7.34757 +▁found -7.34819 +▁while -7.35326 +i -7.36804 +▁saw -7.37042 +▁get -7.37494 +an -7.37662 +▁people -7.38318 +▁face -7.38748 +▁young -7.39215 +▁under -7.40057 +▁once -7.40078 +▁tell -7.40791 +▁three -7.413 +▁place -7.41377 +▁room -7.41704 +li -7.42158 +▁yet -7.42442 +▁same -7.42976 +ri -7.42985 +v -7.4311 +▁father -7.44096 +▁though -7.45043 +k -7.45091 +▁another -7.45131 +▁right -7.46533 +▁heart -7.46662 +▁put -7.48293 +▁took -7.48368 +▁give -7.48808 +▁ever -7.4903 +▁work -7.50099 +el -7.50309 +it -7.50743 +▁e -7.51169 +▁look -7.51181 +ry -7.5122 +▁new -7.51353 +il -7.51571 +ers -7.51791 +▁part -7.52099 +▁king -7.52387 +▁missus -7.52455 +▁sir -7.53014 +▁mind -7.5303 +▁looked -7.53104 +us -7.53328 +▁love -7.53458 +ra -7.53906 +▁asked -7.53965 +▁left -7.54703 +▁light -7.56075 +▁moment -7.57071 +ro -7.57073 +et -7.5746 +ive -7.57948 +▁world -7.58543 +▁things -7.58651 +▁home -7.58975 +▁thing -7.6002 +f -7.60068 +h -7.60196 +ful -7.60292 +▁why -7.60735 +▁mother -7.61051 +▁always -7.61115 +▁far -7.61265 +▁water -7.61901 +▁s -7.61926 +la -7.62405 +ce -7.62873 +ck -7.62955 +▁heard -7.63327 +▁something -7.63489 +w -7.63624 +▁seemed -7.63649 +ch -7.64796 +▁because -7.65167 +▁end -7.65457 +▁told -7.66091 +▁yes -7.66365 +▁door -7.6662 +ted -7.6708 +▁going -7.67276 +▁got -7.67607 +is -7.68689 +ter -7.68801 +▁woman -7.68896 +▁god -7.68943 +ol -7.69186 +est -7.69247 +ent -7.69838 +ur -7.70382 +te -7.70972 +ling -7.71225 +▁find -7.71593 +▁knew -7.72124 +ne -7.72399 +▁soon -7.72471 +▁each -7.72548 +▁side -7.72953 +▁oh -7.73896 +ul -7.74838 +▁against -7.75871 +▁name -7.77125 +▁miss -7.77191 +▁quite -7.77406 +▁con -7.77659 +▁ma -7.7812 +▁want -7.78461 +▁years -7.78825 +▁few -7.78901 +▁better -7.79308 +▁half -7.79628 +ton -7.79945 +▁done -7.80176 +ment -7.81027 +▁also -7.81536 +se -7.81952 +▁began -7.82133 +▁having -7.82983 +▁enough -7.83157 +▁lady -7.84016 +▁whole -7.84092 +▁both -7.8452 +▁seen -7.84696 +led -7.85123 +▁set -7.8565 +▁white -7.85755 +▁course -7.86189 +tion -7.86283 +▁voice -7.86482 +ir -7.865 +▁called -7.86562 +ma -7.88043 +lo -7.88068 +▁turned -7.88486 +▁gave -7.88561 +man -7.89007 +▁poor -7.89153 +▁dear -7.89597 +▁girl -7.89892 +▁morning -7.90137 +less -7.90146 +▁between -7.90202 +▁nor -7.90275 +▁among -7.9053 +ate -7.90969 +ies -7.91089 +▁p -7.91307 +ff -7.91729 +na -7.92272 +▁small -7.92689 +ty -7.92942 +ous -7.93067 +▁ga -7.93278 +▁whom -7.93725 +▁felt -7.93876 +▁hands -7.93947 +▁myself -7.94602 +▁high -7.94632 +▁ex -7.94686 +▁however -7.94887 +ia -7.94934 +▁herself -7.95264 +▁stood -7.95858 +▁kind -7.95874 +▁hundred -7.95955 +▁la -7.96684 +▁round -7.97066 +▁almost -7.97354 +om -7.98129 +▁since -7.9813 +sh -7.98849 +▁c -7.98852 +▁ten -7.9898 +▁rest -7.9973 +▁boy -7.99935 +▁mo -8.00015 +▁perhaps -8.00311 +ish -8.0036 +ru -8.0045 +▁words -8.00475 +mp -8.00876 +▁sat -8.01874 +co -8.02001 +▁replied -8.02087 +▁four -8.02469 +▁anything -8.02776 +as -8.02812 +▁till -8.02843 +x -8.02978 +ting -8.0301 +▁until -8.03441 +▁black -8.03588 +ated -8.03649 +me -8.03831 +▁b -8.04278 +id -8.04354 +▁cried -8.04406 +▁fact -8.05064 +▁help -8.05169 +▁next -8.05191 +ie -8.05368 +▁looking -8.05378 +▁friend -8.05529 +▁does -8.05546 +▁lay -8.05695 +▁brought -8.06229 +▁fire -8.06598 +▁keep -8.06679 +ver -8.07005 +▁sea -8.07356 +▁country -8.07394 +▁word -8.07524 +▁days -8.07754 +▁together -8.0803 +▁reason -8.0831 +ut -8.08642 +ance -8.0867 +▁indeed -8.08859 +▁matter -8.08986 +▁ra -8.09017 +▁li -8.09673 +▁air -8.09835 +▁full -8.09927 +▁rather -8.10244 +▁hope -8.10365 +▁land -8.1041 +gg -8.10417 +am -8.10449 +▁open -8.10788 +tic -8.10921 +▁feet -8.11058 +▁imp -8.11102 +ke -8.11263 +ine -8.11421 +▁d -8.11547 +▁five -8.11674 +▁point -8.11763 +▁large -8.1235 +ci -8.12437 +vi -8.1256 +▁child -8.13099 +▁gone -8.13104 +▁ho -8.1317 +pp -8.13272 +▁best -8.13427 +▁hard -8.13582 +ant -8.13757 +▁lord -8.13785 +▁wife -8.13848 +▁sure -8.13962 +de -8.14218 +po -8.14226 +▁form -8.14557 +▁death -8.14965 +▁care -8.15583 +ence -8.15604 +▁nature -8.15699 +▁co -8.15856 +▁believe -8.15947 +▁near -8.16247 +▁red -8.16407 +▁ro -8.16449 +▁ha -8.16607 +▁speak -8.16703 +▁fear -8.16889 +▁case -8.16944 +▁taken -8.17098 +▁cannot -8.17343 +▁hear -8.17518 +▁along -8.17564 +▁themselves -8.17588 +um -8.17641 +▁present -8.18164 +▁master -8.18704 +▁son -8.18955 +▁war -8.19388 +▁po -8.19446 +▁thus -8.19772 +▁true -8.20459 +▁car -8.20477 +▁less -8.20846 +▁thousand -8.21254 +▁w -8.21417 +mi -8.2162 +▁money -8.21713 +nd -8.21716 +▁da -8.21888 +▁power -8.22077 +▁behind -8.22087 +ard -8.2226 +to -8.22274 +▁children -8.2228 +▁doctor -8.22317 +▁dis -8.22371 +▁twenty -8.22732 +▁wish -8.22739 +▁sound -8.22843 +▁whose -8.23097 +▁leave -8.23197 +▁answered -8.23298 +▁thou -8.23321 +ac -8.23461 +▁dur -8.23471 +▁certain -8.2375 +ge -8.24317 +▁cl -8.24703 +▁g -8.24779 +▁passed -8.24862 +▁arm -8.25095 +mo -8.25395 +ious -8.2544 +▁state -8.25486 +▁alone -8.25597 +▁show -8.25689 +▁ba -8.25864 +▁need -8.25881 +▁live -8.26099 +▁dead -8.26254 +▁pro -8.26311 +▁mu -8.26701 +▁strong -8.26733 +▁en -8.26801 +▁bo -8.26981 +▁ground -8.27309 +▁short -8.27476 +▁st -8.27974 +▁horse -8.28616 +▁prince -8.28817 +▁pre -8.28817 +ian -8.29122 +at -8.29216 +un -8.29302 +▁fell -8.2982 +▁order -8.29901 +▁call -8.29938 +▁ca -8.30443 +▁sun -8.30517 +ta -8.30566 +▁given -8.30619 +▁therefore -8.30754 +▁dark -8.30758 +▁close -8.30816 +▁body -8.31022 +▁others -8.31043 +▁sent -8.31212 +ad -8.3132 +▁second -8.316 +red -8.31726 +▁often -8.31883 +▁manner -8.32481 +▁vi -8.32632 +▁f -8.33096 +▁lo -8.33173 +▁question -8.33377 +▁hour -8.33469 +▁turn -8.33975 +▁table -8.34248 +▁general -8.34277 +▁earth -8.34496 +▁bed -8.34708 +age -8.3481 +ward -8.35051 +▁really -8.35139 +▁six -8.35374 +▁become -8.35755 +▁read -8.36081 +▁use -8.36236 +▁coming -8.37141 +▁everything -8.37319 +▁above -8.37882 +▁evening -8.37903 +▁beautiful -8.3822 +▁feel -8.38244 +▁least -8.3841 +ical -8.38416 +▁law -8.38452 +▁already -8.38637 +▁rose -8.38677 +▁mean -8.38681 +▁ran -8.38738 +▁itself -8.38828 +▁soul -8.39221 +▁suddenly -8.39493 +▁around -8.39553 +▁ti -8.39629 +▁sa -8.39657 +▁answer -8.39921 +▁em -8.40114 +ber -8.40546 +que -8.40812 +ti -8.40975 +▁won -8.41017 +▁wind -8.41105 +▁fine -8.41304 +▁whether -8.41526 +▁known -8.41725 +▁captain -8.42272 +▁eye -8.42551 +▁person -8.42656 +▁women -8.42706 +▁sort -8.42764 +▁ask -8.42963 +▁per -8.43123 +▁brother -8.43586 +ni -8.43821 +▁used -8.44025 +▁held -8.44066 +▁big -8.44256 +▁returned -8.44473 +▁strange -8.44488 +no -8.45273 +▁free -8.45451 +▁either -8.45513 +▁within -8.45564 +▁doubt -8.45671 +▁year -8.45862 +▁clear -8.46003 +▁sight -8.46043 +▁lost -8.46111 +ho -8.46112 +▁se -8.46255 +▁le -8.46257 +▁kept -8.46289 +▁bar -8.46341 +▁bu -8.46354 +▁town -8.46388 +ring -8.46594 +▁sleep -8.46906 +ist -8.47099 +▁hair -8.47372 +▁friends -8.47427 +nt -8.4756 +▁dream -8.47568 +▁fellow -8.47629 +▁deep -8.47799 +▁past -8.4783 +▁became -8.47901 +op -8.48024 +▁making -8.48051 +▁act -8.48477 +bo -8.48576 +im -8.48695 +▁bad -8.4879 +ary -8.49097 +▁ta -8.49642 +ily -8.4979 +▁bring -8.498 +ster -8.49837 +▁ye -8.50127 +▁means -8.50147 +▁run -8.50334 +men -8.50338 +▁daughter -8.50689 +▁sense -8.50862 +cy -8.51181 +▁city -8.51186 +▁sometimes -8.51205 +▁towards -8.51344 +▁road -8.51845 +▁gra -8.51919 +▁ready -8.52448 +dy -8.5251 +ure -8.52531 +son -8.52666 +▁mar -8.52707 +▁cold -8.53015 +▁foot -8.53033 +▁else -8.53193 +▁letter -8.5321 +ud -8.53213 +▁k -8.53803 +▁sp -8.53997 +▁truth -8.54012 +▁idea -8.54104 +▁sta -8.54296 +▁business -8.54487 +▁subject -8.54754 +▁john -8.54757 +▁court -8.54846 +▁river -8.55047 +▁ru -8.55137 +▁di -8.5541 +▁family -8.5565 +▁didn -8.56006 +▁several -8.56147 +▁glad -8.56226 +ens -8.56422 +▁understand -8.56476 +▁possible -8.56873 +▁return -8.56875 +▁different -8.56878 +▁arms -8.5689 +he -8.57005 +▁low -8.57062 +▁hold -8.57171 +ating -8.57288 +▁talk -8.57294 +▁window -8.57563 +▁lu -8.57574 +▁sh -8.57632 +▁interest -8.57875 +▁sister -8.57949 +▁blood -8.58666 +▁says -8.58691 +land -8.59031 +▁th -8.59363 +▁human -8.59452 +▁cause -8.59568 +go -8.59691 +▁thank -8.59812 +▁late -8.59857 +▁cut -8.59993 +▁across -8.60115 +ng -8.60191 +▁story -8.6039 +ial -8.60458 +▁count -8.60531 +by -8.61141 +▁number -8.61156 +▁stand -8.61173 +▁able -8.61219 +per -8.61242 +▁church -8.61299 +che -8.61435 +les -8.61602 +▁thy -8.61746 +▁comp -8.61815 +▁suppose -8.6189 +▁effect -8.62111 +▁si -8.62299 +ba -8.62734 +▁spoke -8.62957 +▁green -8.6315 +▁husband -8.63174 +▁respect -8.63174 +cu -8.63314 +▁remember -8.63324 +▁followed -8.63382 +▁longer -8.63684 +ions -8.63877 +tro -8.63906 +▁taking -8.64065 +▁seem -8.64106 +▁t -8.64367 +▁happy -8.64443 +pe -8.64475 +▁line -8.64596 +ley -8.64671 +▁stay -8.6532 +▁play -8.6534 +▁common -8.65531 +be -8.65623 +▁times -8.65717 +▁book -8.65736 +und -8.65793 +▁object -8.66012 +▁seven -8.66091 +▁met -8.66215 +ca -8.66333 +▁age -8.66376 +▁sha -8.66505 +▁pretty -8.6663 +▁fair -8.66837 +do -8.66895 +▁wood -8.66965 +os -8.67011 +▁reached -8.6731 +▁sweet -8.67437 +▁appeared -8.67453 +▁fall -8.67545 +▁pass -8.67577 +▁sign -8.67655 +▁art -8.67659 +da -8.67771 +▁tree -8.68022 +▁garden -8.68055 +▁fl -8.68212 +▁remain -8.68618 +▁opened -8.68883 +qui -8.69114 +▁bright -8.69391 +▁street -8.6983 +▁hu -8.69925 +▁tu -8.70032 +▁trouble -8.70065 +▁pain -8.7029 +▁continued -8.70344 +▁school -8.70366 +▁carried -8.70421 +▁saying -8.70493 +▁follow -8.71325 +▁change -8.71328 +nce -8.71349 +▁gold -8.71391 +▁bear -8.71554 +▁su -8.71566 +▁feeling -8.71637 +▁command -8.71679 +▁certainly -8.71824 +▁blue -8.71904 +▁wild -8.72003 +▁account -8.72368 +▁ne -8.72403 +▁ought -8.72848 +▁fi -8.73365 +▁breath -8.73491 +▁wanted -8.73914 +ov -8.74173 +lt -8.74286 +▁ill -8.74353 +ow -8.74421 +▁sc -8.74663 +der -8.74682 +▁heaven -8.74684 +▁purpose -8.74686 +ha -8.74759 +▁character -8.74843 +▁rich -8.7515 +our -8.75547 +▁dress -8.75781 +▁english -8.76108 +▁chance -8.76254 +▁view -8.76496 +▁ship -8.76584 +▁toward -8.76672 +▁real -8.76718 +▁joy -8.76779 +▁cap -8.77235 +▁plan -8.77246 +▁neither -8.77275 +▁force -8.77285 +▁uncle -8.77317 +▁princess -8.77387 +▁har -8.77474 +▁hat -8.77801 +way -8.77869 +▁chief -8.77894 +▁lived -8.78017 +▁na -8.78141 +▁visit -8.7824 +▁mor -8.78381 +▁wall -8.78652 +▁pleasure -8.78739 +▁pe -8.7879 +▁smile -8.78797 +▁front -8.78866 +▁mine -8.78902 +▁ri -8.79253 +▁deal -8.79282 +ier -8.79326 +▁further -8.79368 +▁tried -8.79541 +▁none -8.80009 +uc -8.80166 +▁entered -8.80167 +▁pay -8.80408 +▁queen -8.80455 +▁except -8.80579 +va -8.80801 +▁forward -8.80805 +ot -8.80998 +▁eight -8.81171 +▁added -8.81314 +▁public -8.81323 +▁eighteen -8.81324 +ft -8.81377 +▁star -8.81398 +▁happened -8.81873 +ned -8.81953 +▁although -8.822 +▁later -8.82204 +▁walked -8.82218 +▁walk -8.82238 +▁spirit -8.8225 +▁bit -8.82313 +▁meet -8.82432 +▁led -8.82559 +fa -8.82849 +▁mouth -8.82946 +▁wait -8.83231 +rs -8.83281 +▁gu -8.83416 +▁hours -8.83454 +lin -8.83526 +▁living -8.83739 +▁yourself -8.83798 +em -8.83827 +▁fast -8.83971 +▁hall -8.84497 +▁beyond -8.84576 +▁boat -8.84732 +▁secret -8.84736 +▁chair -8.84911 +▁pu -8.85297 +▁received -8.85389 +▁pa -8.85426 +▁cat -8.8545 +▁desire -8.85826 +▁ja -8.8592 +▁gentleman -8.85927 +▁cra -8.85959 +ress -8.8609 +▁laid -8.86415 +▁party -8.86721 +▁wonder -8.86748 +▁occasion -8.86751 +ig -8.86771 +▁fish -8.87005 +▁mi -8.87027 +▁send -8.87486 +vo -8.87515 +ged -8.87522 +ak -8.87728 +▁nearly -8.87803 +con -8.87846 +▁try -8.8788 +▁seems -8.88114 +▁silence -8.88499 +▁bell -8.88523 +ever -8.88574 +▁bra -8.88685 +▁guard -8.88716 +▁rep -8.88973 +▁die -8.89013 +▁doing -8.89179 +▁early -8.89211 +ugh -8.89235 +▁bank -8.89235 +▁figure -8.89252 +den -8.89326 +▁england -8.89568 +▁mary -8.896 +▁fo -8.89799 +▁cor -8.89892 +▁afraid -8.90011 +▁watch -8.90402 +▁gre -8.90554 +▁aunt -8.91001 +tur -8.91229 +▁service -8.91353 +▁je -8.91387 +▁minutes -8.91421 +▁trees -8.91568 +▁glass -8.91774 +▁pan -8.91942 +▁va -8.91977 +▁tone -8.91998 +▁please -8.92034 +▁forth -8.92051 +▁cur -8.92101 +▁cross -8.92166 +▁fa -8.92184 +▁exclaimed -8.92273 +ler -8.92342 +▁pen -8.92344 +ten -8.92376 +▁pi -8.92426 +▁eat -8.92444 +▁drew -8.92453 +ble -8.92499 +ably -8.9255 +▁grave -8.92616 +▁miles -8.92876 +▁ago -8.92887 +▁position -8.9304 +▁warm -8.93052 +▁length -8.93236 +▁necessary -8.93236 +▁thinking -8.93313 +▁soft -8.9336 +▁picture -8.93367 +ship -8.93369 +ations -8.9338 +av -8.93443 +ible -8.93462 +▁ah -8.93999 +▁heavy -8.94029 +▁attention -8.94092 +▁dog -8.94119 +▁standing -8.94354 +rn -8.94361 +ron -8.94363 +▁natural -8.94438 +▁appear -8.94438 +▁caught -8.94556 +gra -8.94669 +▁spring -8.94922 +▁experience -8.94955 +▁pat -8.95299 +▁pri -8.95372 +▁stopped -8.95569 +▁regard -8.95615 +▁hardly -8.95978 +▁self -8.96008 +▁strength -8.96095 +kin -8.96238 +▁grew -8.96282 +▁knight -8.96298 +▁opinion -8.96298 +▁ab -8.96388 +rk -8.96526 +▁wide -8.96661 +▁instead -8.96774 +▁south -8.96781 +▁trans -8.96816 +▁learn -8.9712 +▁corner -8.97137 +▁island -8.97439 +▁third -8.97591 +▁straight -8.97728 +▁tea -8.97822 +▁bound -8.97901 +▁seeing -8.97967 +▁cha -8.98025 +▁dinner -8.98079 +▁beauty -8.98209 +▁peace -8.98292 +▁silent -8.98762 +▁cre -8.98909 +▁sw -8.99093 +▁step -8.99147 +▁jo -8.99178 +▁wa -8.99194 +▁sitting -8.99214 +▁thirty -8.99247 +▁save -8.99425 +▁glance -8.99532 +▁loved -8.99677 +▁reach -8.99979 +▁action -9.00043 +▁ver -9.0005 +ger -9.00278 +▁sad -9.00395 +▁stone -9.00628 +ened -9.00671 +▁french -9.00862 +▁m -9.0087 +▁struck -9.01003 +▁paper -9.01106 +ally -9.01111 +▁whatever -9.01193 +▁sub -9.01227 +▁distance -9.01287 +▁wrong -9.01358 +▁knowledge -9.01358 +▁safe -9.01474 +▁snow -9.01501 +▁fifty -9.01643 +▁attempt -9.01714 +▁music -9.01799 +▁government -9.01876 +▁crowd -9.02244 +▁besides -9.02296 +▁box -9.02356 +▁direction -9.02387 +▁train -9.02393 +▁north -9.02395 +ped -9.02429 +▁el -9.02475 +▁thick -9.02509 +▁getting -9.02554 +▁floor -9.0289 +▁company -9.03007 +▁blow -9.03021 +bu -9.03086 +▁plain -9.03126 +▁beside -9.0315 +ities -9.03293 +▁rock -9.03348 +▁immediately -9.03354 +▁shadow -9.03442 +▁sit -9.03601 +▁drink -9.03952 +king -9.04249 +▁spot -9.04416 +▁danger -9.04433 +▁wi -9.04538 +▁saint -9.04685 +▁slowly -9.04691 +ah -9.04742 +▁palace -9.04831 +ors -9.04944 +▁peter -9.05013 +▁result -9.05052 +ric -9.05115 +▁forest -9.05173 +▁tears -9.0564 +ism -9.05656 +▁belong -9.05664 +▁appearance -9.05678 +▁par -9.05711 +▁gate -9.05778 +▁ju -9.06233 +▁quickly -9.06437 +▁fit -9.06524 +▁quiet -9.06573 +ris -9.06619 +▁london -9.06688 +▁start -9.06791 +rt -9.06846 +▁brown -9.06949 +▁consider -9.07025 +▁battle -9.07145 +▁anne -9.07195 +▁piece -9.07248 +▁died -9.07512 +▁success -9.07617 +▁post -9.07672 +▁lips -9.07702 +▁filled -9.078 +▁forget -9.07832 +ified -9.08089 +▁margaret -9.08123 +▁food -9.08284 +▁pleasant -9.08657 +ner -9.08809 +▁expression -9.08909 +▁pocket -9.08963 +fi -9.08995 +▁wear -9.09356 +▁fresh -9.09425 +au -9.09646 +ham -9.09714 +▁broken -9.09722 +▁laughed -9.09757 +▁following -9.09843 +▁youth -9.09887 +▁touch -9.10015 +▁sal -9.10107 +▁week -9.10288 +▁remained -9.10418 +▁leg -9.10432 +▁easy -9.1051 +▁al -9.10564 +▁enter -9.10865 +▁ste -9.1089 +▁ch -9.10922 +▁fight -9.10933 +▁placed -9.10947 +▁travel -9.10964 +▁simple -9.11135 +▁girls -9.11236 +▁waiting -9.11512 +▁stop -9.11684 +if -9.11804 +ile -9.11906 +ning -9.11982 +▁camp -9.12002 +▁ni -9.12035 +▁wise -9.12043 +▁office -9.12111 +▁fe -9.12205 +▁grand -9.12295 +▁judge -9.12363 +ny -9.12381 +▁quick -9.12617 +tri -9.12647 +▁du -9.12874 +▁fra -9.12979 +▁flo -9.1301 +ging -9.13045 +▁comfort -9.13208 +▁particular -9.13305 +▁suit -9.1338 +▁started -9.13391 +▁top -9.13613 +▁hot -9.13623 +▁impossible -9.13675 +ach -9.13707 +▁pale -9.13732 +ments -9.13795 +▁ve -9.13914 +▁conversation -9.13917 +▁scene -9.14081 +▁boys -9.14082 +▁society -9.14402 +▁outside -9.14432 +▁write -9.14476 +▁effort -9.14645 +▁talking -9.14693 +▁fortune -9.14726 +▁nine -9.14985 +▁single -9.151 +▁cro -9.152 +▁port -9.15411 +▁happen -9.15427 +▁rule -9.15463 +▁cast -9.15628 +▁shut -9.15709 +▁noble -9.15917 +▁gun -9.15924 +▁path -9.15997 +▁begin -9.16092 +▁win -9.16136 +▁sky -9.16149 +▁wonderful -9.16515 +▁sudden -9.16577 +▁army -9.16589 +ga -9.16805 +▁mountain -9.16841 +▁worth -9.16959 +▁grace -9.17162 +▁below -9.17203 +▁chapter -9.17215 +▁turning -9.17273 +▁afternoon -9.17612 +▁iron -9.17626 +▁bow -9.17691 +up -9.17693 +▁evil -9.17696 +▁trust -9.17749 +ag -9.17757 +▁recogni -9.1778 +▁ring -9.17871 +▁lad -9.17907 +▁sail -9.18071 +▁content -9.18118 +▁horses -9.18165 +▁silver -9.18199 +ory -9.18236 +ay -9.18273 +▁tri -9.18493 +▁running -9.18731 +▁hill -9.18744 +▁beginning -9.18888 +▁habit -9.1913 +▁mad -9.19289 +pa -9.19389 +▁clothes -9.19512 +▁morrow -9.19566 +▁cry -9.19577 +▁fashion -9.1964 +▁presence -9.19642 +▁min -9.19708 +▁tra -9.19725 +▁arrived -9.19781 +▁quarter -9.19811 +▁perfect -9.19902 +▁usual -9.19961 +▁neck -9.19975 +▁married -9.19983 +▁seat -9.20022 +wi -9.20071 +▁sand -9.20413 +▁shore -9.20419 +ries -9.20447 +▁giving -9.20584 +▁probably -9.2067 +▁expect -9.20736 +▁minute -9.20838 +▁shot -9.20958 +▁instant -9.21089 +▁degree -9.21275 +▁color -9.21461 +▁west -9.21547 +▁winter -9.21587 +ran -9.21593 +val -9.21703 +▁march -9.21721 +▁gar -9.21774 +▁bird -9.21826 +▁serious -9.21896 +▁greater -9.21909 +▁showed -9.21924 +▁covered -9.21941 +▁former -9.21951 +▁carry -9.21985 +▁loud -9.22023 +▁moved -9.2207 +▁mass -9.22168 +▁tom -9.22175 +lar -9.22214 +▁roman -9.22598 +▁moon -9.22677 +▁stream -9.22937 +▁easily -9.23026 +▁couldn -9.2303 +ey -9.23089 +▁search -9.23115 +▁board -9.23122 +▁wished -9.23148 +ap -9.23201 +▁months -9.23242 +▁sick -9.23317 +▁bla -9.23394 +▁duty -9.23511 +▁twelve -9.23557 +▁faint -9.23649 +▁hi -9.23676 +▁stranger -9.23765 +▁surprise -9.23849 +▁kill -9.23864 +fe -9.239 +▁leaving -9.23913 +ub -9.23923 +▁journey -9.24091 +▁raised -9.24202 +▁scarcely -9.24209 +▁speaking -9.2426 +▁terrible -9.24359 +▁game -9.24488 +▁field -9.24561 +▁mer -9.24586 +▁promise -9.24657 +▁condition -9.24771 +▁personal -9.24929 +▁tall -9.24935 +▁stick -9.25 +▁threw -9.25168 +ip -9.25241 +▁marry -9.25282 +ative -9.25306 +gi -9.25323 +▁van -9.25378 +▁according -9.25484 +▁burn -9.25574 +▁sei -9.25721 +▁lie -9.25726 +▁attack -9.25802 +▁sword -9.25809 +▁rise -9.25828 +▁thoughts -9.25867 +side -9.25899 +▁guess -9.25901 +▁dar -9.26041 +▁calm -9.26116 +▁thin -9.2615 +▁village -9.26256 +▁anxious -9.26439 +▁expected -9.26601 +▁ball -9.26745 +▁especially -9.26805 +▁charge -9.26831 +▁measure -9.26897 +gn -9.26921 +▁seek -9.26938 +▁te -9.26963 +▁nice -9.2709 +her -9.27108 +▁trying -9.27193 +▁allow -9.27357 +▁bread -9.27449 +▁sharp -9.27462 +gu -9.27478 +▁honour -9.27541 +▁honor -9.27635 +▁entirely -9.2768 +▁bill -9.27739 +rous -9.27784 +▁bri -9.27788 +▁written -9.27819 +▁broke -9.27946 +▁killed -9.2795 +wa -9.28007 +▁offer -9.28008 +▁ladies -9.28047 +▁mark -9.28091 +▁flowers -9.28165 +▁learned -9.28181 +▁forty -9.28372 +▁happiness -9.28469 +▁pray -9.28486 +▁class -9.28584 +▁principle -9.28749 +▁ven -9.28892 +gen -9.28901 +▁fer -9.28919 +▁shape -9.28928 +▁summer -9.28943 +▁books -9.2895 +▁jack -9.28989 +▁draw -9.29038 +tin -9.2915 +▁golden -9.29273 +▁decided -9.29353 +▁unless -9.29627 +▁lead -9.29655 +▁listen -9.29844 +▁shook -9.29892 +▁noise -9.29931 +▁influence -9.29972 +eth -9.30032 +▁perfectly -9.30091 +▁marriage -9.30257 +▁broad -9.30274 +▁states -9.30314 +▁escape -9.30317 +▁middle -9.30362 +▁plant -9.30436 +▁movement -9.30501 +▁enemy -9.30542 +▁break -9.30544 +▁history -9.30549 +▁understood -9.30637 +▁latter -9.30638 +▁comes -9.30659 +wn -9.30685 +▁merely -9.3078 +▁simply -9.30828 +▁imagine -9.31019 +▁lower -9.3121 +▁born -9.31282 +▁conduct -9.31306 +▁yard -9.31406 +▁den -9.31624 +▁closed -9.31666 +▁fro -9.31877 +▁makes -9.31891 +lie -9.32113 +▁exist -9.32174 +▁speech -9.3227 +▁bitter -9.3235 +jo -9.3246 +hi -9.3254 +ib -9.32565 +▁grass -9.32705 +▁reply -9.32779 +▁changed -9.32821 +▁ka -9.3295 +▁dance -9.3312 +▁lying -9.33191 +▁finally -9.33222 +▁american -9.33343 +▁enjoy -9.33348 +▁contain -9.33439 +▁observed -9.33536 +▁meant -9.33571 +▁flu -9.3378 +ev -9.33858 +▁laugh -9.34134 +oo -9.34138 +▁afterwards -9.34164 +pose -9.34235 +▁beat -9.34266 +▁equal -9.3437 +▁race -9.34393 +▁rain -9.34564 +▁steps -9.34565 +▁gi -9.3462 +▁beneath -9.34821 +io -9.34833 +▁tail -9.34953 +▁taste -9.35112 +▁che -9.3514 +▁char -9.35243 +▁grow -9.35273 +clock -9.35505 +▁repeated -9.3551 +▁move -9.3553 +▁mon -9.35718 +▁lot -9.35898 +▁note -9.36107 +ther -9.36128 +▁madame -9.36149 +▁brave -9.36158 +ians -9.36183 +▁castle -9.36196 +bi -9.36309 +▁future -9.36322 +▁relation -9.36426 +▁sorry -9.36427 +▁health -9.36434 +▁dick -9.36447 +▁building -9.36547 +lf -9.36874 +▁edge -9.36921 +▁bless -9.36973 +▁mis -9.36985 +▁spite -9.36994 +mer -9.37185 +▁mill -9.37444 +▁prisoner -9.37517 +▁allowed -9.37651 +▁catch -9.379 +▁coat -9.38075 +▁complete -9.38129 +▁wouldn -9.382 +the -9.38299 +▁yellow -9.3836 +▁important -9.38367 +▁creature -9.38369 +▁passing -9.38461 +▁darkness -9.38601 +▁carriage -9.38669 +▁fifteen -9.38772 +▁hung -9.38791 +▁spread -9.38876 +▁pleased -9.38883 +▁curious -9.38918 +▁reali -9.38934 +▁worse -9.3898 +ement -9.39043 +▁circumstances -9.39055 +▁qua -9.39079 +▁din -9.39256 +▁jane -9.39383 +▁add -9.39383 +▁east -9.3941 +▁cup -9.39472 +▁blind -9.39499 +▁passion -9.39519 +▁discovered -9.39614 +▁notice -9.39644 +▁report -9.39752 +we -9.39837 +▁space -9.39918 +▁com -9.4017 +▁presently -9.40287 +▁sorrow -9.40336 +▁pack -9.40421 +▁dry -9.40549 +▁ancient -9.40651 +fer -9.40713 +▁cover -9.40802 +▁dressed -9.40804 +▁existence -9.40998 +▁exactly -9.41068 +▁beast -9.41096 +▁proper -9.41119 +▁dropped -9.41192 +▁clean -9.41286 +▁colour -9.41297 +▁host -9.41436 +▁mere -9.41572 +and -9.4175 +▁determined -9.41801 +▁chamber -9.41816 +cent -9.41871 +▁faith -9.41872 +▁sto -9.4188 +▁skin -9.421 +▁storm -9.42138 +▁persons -9.42186 +▁priest -9.42212 +▁pick -9.42288 +▁support -9.4235 +▁narrow -9.4235 +▁private -9.42457 +▁smiled -9.42561 +▁cousin -9.42672 +▁drawing -9.42682 +▁attend -9.42755 +▁cook -9.42811 +▁prevent -9.42995 +▁various -9.43011 +▁hole -9.43205 +▁weak -9.43221 +▁fixed -9.43226 +let -9.43406 +▁bottom -9.43427 +▁nobody -9.43427 +▁eli -9.43557 +▁legs -9.43638 +▁ar -9.43728 +ade -9.4384 +▁individual -9.43861 +▁dare -9.43865 +▁ears -9.44178 +ug -9.44328 +▁advantage -9.44516 +▁france -9.44539 +▁lives -9.44639 +▁wine -9.44744 +▁walls -9.44867 +▁tired -9.44922 +▁shop -9.44987 +▁cru -9.45028 +▁animal -9.45076 +▁wrote -9.45175 +▁royal -9.45176 +ki -9.45265 +▁isn -9.45395 +▁bon -9.45485 +▁considered -9.45562 +▁moral -9.45564 +▁companion -9.4577 +▁lose -9.45813 +▁lake -9.45864 +▁bag -9.46002 +▁letters -9.46007 +▁luck -9.46037 +▁sy -9.46198 +hood -9.46307 +▁inter -9.46621 +▁german -9.46634 +▁sake -9.46706 +▁drop -9.46715 +▁paid -9.4679 +▁ear -9.46913 +▁breakfast -9.46953 +▁labor -9.46955 +▁desert -9.47071 +▁declared -9.47139 +▁study -9.47178 +▁instance -9.47184 +▁song -9.47236 +▁somewhat -9.47291 +▁cloth -9.47377 +▁colonel -9.47403 +▁special -9.47403 +▁value -9.47527 +ld -9.47606 +▁main -9.47694 +▁proud -9.47697 +▁express -9.47824 +▁nation -9.47829 +▁handsome -9.47938 +▁confess -9.47973 +ps -9.48006 +▁passage -9.48021 +▁period -9.48082 +▁gen -9.4815 +▁christ -9.48187 +▁custom -9.48309 +row -9.4831 +▁hurt -9.48337 +▁shoulder -9.48433 +▁cu -9.48495 +▁sin -9.48574 +▁receive -9.48598 +ite -9.48641 +light -9.48678 +▁difficult -9.48784 +ple -9.48865 +▁depend -9.48879 +▁meeting -9.48891 +▁heat -9.48893 +▁believed -9.48972 +▁social -9.48997 +▁difficulty -9.4905 +▁greatest -9.4908 +▁drawn -9.49088 +▁grant -9.49184 +▁birds -9.49301 +▁angry -9.49342 +ign -9.49466 +▁places -9.49511 +▁gri -9.4964 +▁courage -9.49683 +▁disc -9.4972 +▁evidently -9.49722 +▁gentle -9.49742 +▁cruel -9.49742 +▁george -9.49798 +▁due -9.49871 +▁paris -9.50034 +▁knows -9.50057 +▁knowing -9.50084 +▁servant -9.50088 +▁writing -9.50377 +▁pure -9.50397 +▁holding -9.50448 +▁remembered -9.50481 +▁tender -9.5049 +▁whi -9.50695 +▁burst -9.50701 +▁surely -9.50748 +▁valley -9.50855 +hy -9.51064 +▁conf -9.51116 +▁spoken -9.51131 +▁christian -9.51262 +▁store -9.51318 +▁henry -9.51332 +▁finished -9.51369 +▁qui -9.51369 +▁ob -9.51392 +▁prove -9.51443 +▁fool -9.51478 +▁ban -9.51521 +▁soldiers -9.51612 +▁language -9.51779 +▁inside -9.51827 +▁fallen -9.5209 +itch -9.52244 +▁baby -9.52317 +▁pot -9.52331 +▁situation -9.5237 +▁ruin -9.52474 +▁watched -9.52482 +▁gentlemen -9.52509 +▁fancy -9.52617 +▁accept -9.52659 +▁mal -9.52755 +▁season -9.52821 +▁ourselves -9.52844 +▁speed -9.53094 +ans -9.53103 +nic -9.53266 +▁fu -9.53441 +▁cool -9.53512 +form -9.53515 +▁vessel -9.53561 +▁william -9.53563 +▁serve -9.53642 +▁obliged -9.53681 +▁group -9.53691 +my -9.53852 +od -9.53859 +▁leaves -9.53884 +▁goes -9.53981 +▁peculiar -9.54041 +▁news -9.54053 +▁vain -9.54213 +▁everybody -9.54282 +▁pin -9.5434 +▁forgotten -9.54412 +▁carefully -9.54456 +▁flash -9.54524 +uous -9.54561 +ook -9.54675 +ched -9.54731 +▁murder -9.54736 +▁und -9.54748 +▁delight -9.54769 +▁waited -9.54905 +▁roll -9.54927 +▁property -9.54931 +▁noticed -9.54941 +▁hum -9.54975 +han -9.54979 +▁fur -9.55108 +▁knock -9.55131 +▁earnest -9.55152 +▁ge -9.55239 +uch -9.55241 +▁honest -9.55375 +▁promised -9.55457 +wood -9.55616 +▁san -9.55635 +▁walking -9.55738 +▁quietly -9.55865 +▁square -9.55866 +▁cloud -9.5589 +one -9.55892 +▁higher -9.56088 +▁built -9.5611 +▁formed -9.56135 +▁teach -9.56201 +▁fate -9.56269 +▁false -9.56356 +▁york -9.56368 +▁bal -9.56386 +▁climb -9.56479 +▁dust -9.56506 +▁fond -9.56536 +▁grown -9.56693 +▁fruit -9.5685 +▁generally -9.56896 +▁offered -9.57025 +▁nurse -9.57101 +▁spent -9.57227 +▁join -9.57301 +▁meaning -9.57367 +▁smoke -9.57471 +▁station -9.57515 +▁rough -9.57528 +line -9.5754 +ju -9.57649 +▁likely -9.57725 +▁surface -9.57845 +▁month -9.57879 +▁r -9.5807 +▁possession -9.58089 +▁tongue -9.58102 +for -9.58136 +ang -9.58153 +▁duke -9.5827 +stra -9.58404 +▁laughing -9.58435 +▁weather -9.58474 +▁whispered -9.58519 +gan -9.58545 +▁rag -9.58575 +▁system -9.58599 +▁laws -9.58622 +▁touched -9.58764 +▁nose -9.58808 +▁surprised -9.58815 +▁wealth -9.58855 +▁trade -9.58885 +▁nu -9.58947 +▁temper -9.58978 +▁frank -9.58978 +▁arch -9.59065 +▁opportunity -9.59231 +▁animals -9.59345 +▁bare -9.59353 +▁claim -9.59358 +▁cost -9.59584 +▁opposite -9.59739 +▁police -9.59739 +▁key -9.59776 +▁ideas -9.59836 +▁wave -9.5985 +▁cal -9.5994 +▁reading -9.60061 +▁corn -9.6011 +▁collect -9.60123 +ker -9.60382 +▁gray -9.60456 +▁crown -9.60465 +▁shoulders -9.60493 +▁swift -9.60507 +▁wash -9.60516 +▁ice -9.60591 +▁tar -9.60632 +use -9.6067 +▁prepared -9.6068 +▁gro -9.60782 +lac -9.60967 +▁empty -9.61022 +▁share -9.61049 +▁smiling -9.61152 +▁avoid -9.61153 +▁difference -9.61161 +▁explain -9.61169 +▁pour -9.61217 +▁fat -9.61242 +▁attract -9.61281 +▁opening -9.61463 +▁breast -9.6154 +▁material -9.6154 +▁wheel -9.6154 +ius -9.61563 +▁suffering -9.61577 +▁distinct -9.61639 +▁rever -9.61748 +▁sing -9.61819 +▁chi -9.61843 +▁fingers -9.61874 +▁altogether -9.6193 +▁papa -9.6196 +dding -9.62028 +▁brain -9.62096 +▁row -9.62113 +▁asleep -9.62191 +▁grey -9.62254 +▁windows -9.62363 +▁alive -9.62446 +▁proceed -9.62486 +▁flower -9.62538 +▁pieces -9.6261 +▁leap -9.62618 +pping -9.62686 +ef -9.6269 +▁alter -9.62705 +▁memory -9.62717 +aw -9.62815 +▁fill -9.62844 +▁thrown -9.62844 +▁rode -9.6292 +▁kingdom -9.6298 +▁dish -9.62982 +▁mat -9.63055 +▁maid -9.6322 +▁band -9.63234 +some -9.63329 +▁virtue -9.63374 +▁clo -9.63425 +▁guest -9.63479 +▁loss -9.63491 +▁caused -9.63624 +bra -9.63641 +▁motion -9.63672 +▁lovely -9.63741 +▁swa -9.63749 +▁million -9.63758 +▁fault -9.63772 +▁united -9.63911 +oc -9.64057 +▁mountains -9.64071 +▁pur -9.64112 +▁dim -9.64149 +▁satisfied -9.6417 +▁lover -9.64196 +▁harm -9.64233 +▁dollars -9.64303 +▁hero -9.64369 +▁conceal -9.64437 +▁vast -9.64488 +▁hath -9.64582 +▁rush -9.64604 +▁despair -9.64704 +▁pull -9.64708 +lan -9.64708 +▁height -9.64721 +ex -9.64763 +▁pet -9.64824 +ney -9.64929 +▁spi -9.64936 +▁remark -9.64976 +▁pity -9.64999 +▁rising -9.65036 +▁bent -9.65173 +▁hurry -9.65242 +▁bree -9.65243 +ddle -9.65325 +▁pride -9.65356 +▁settled -9.65371 +▁justice -9.65381 +▁finding -9.65389 +▁lifted -9.65406 +▁soldier -9.65444 +▁regular -9.65511 +▁struggle -9.65511 +▁machine -9.65512 +▁sum -9.65631 +▁hurried -9.65647 +▁sufficient -9.65738 +▁throw -9.65747 +▁represent -9.65772 +▁supper -9.65918 +▁double -9.65922 +▁alarm -9.65924 +▁dreadful -9.65954 +▁stock -9.66116 +▁flow -9.66166 +▁example -9.66189 +▁roof -9.66189 +▁ce -9.66229 +▁supposed -9.66546 +▁preserv -9.666 +▁listened -9.66708 +▁col -9.66819 +▁secure -9.67009 +▁frightened -9.67014 +ka -9.6705 +▁drive -9.67127 +▁disturb -9.67145 +▁emotion -9.67283 +▁servants -9.6735 +▁buy -9.674 +▁forced -9.67485 +▁kitchen -9.67558 +rin -9.6761 +▁terror -9.67696 +▁stairs -9.677 +▁sixty -9.67838 +▁ordinary -9.67972 +▁directly -9.67979 +▁heads -9.67985 +▁greatly -9.68092 +▁method -9.68111 +▁forgive -9.68116 +▁awful -9.68119 +▁reflect -9.68138 +▁talked -9.68277 +▁favour -9.6838 +ties -9.68388 +▁welcome -9.68388 +▁tin -9.6845 +▁yo -9.68486 +▁butter -9.68532 +▁control -9.68668 +▁angel -9.68714 +▁vo -9.68747 +stone -9.68797 +▁ordered -9.6884 +▁usually -9.68842 +▁poet -9.68918 +▁bold -9.68985 +ridge -9.69084 +▁adventure -9.69092 +▁watching -9.69214 +▁ride -9.69302 +▁folk -9.69436 +▁mistress -9.69518 +▁rate -9.69657 +▁growing -9.69734 +▁evidence -9.69788 +▁cave -9.69821 +▁j -9.69842 +▁finger -9.69866 +bbe -9.699 +▁seventeen -9.69929 +▁moving -9.69932 +▁cow -9.69957 +▁doesn -9.69962 +ator -9.70019 +▁type -9.70071 +▁tale -9.70074 +▁boil -9.70121 +▁deliver -9.70212 +ire -9.70237 +▁farm -9.70249 +▁mil -9.70318 +▁feelings -9.70333 +▁monsieur -9.70353 +▁gathered -9.7039 +▁putting -9.70417 +▁remarked -9.70434 +▁er -9.70444 +▁contrary -9.70495 +iness -9.70602 +▁crime -9.7078 +▁nearer -9.70882 +▁shame -9.71081 +▁loose -9.71084 +▁discover -9.71192 +▁flat -9.71232 +▁fail -9.7131 +▁twice -9.7135 +▁pla -9.71489 +▁europe -9.71637 +▁patient -9.71637 +▁unto -9.71665 +▁pair -9.71729 +▁suffer -9.7173 +tte -9.71755 +ea -9.71796 +▁hy -9.71815 +▁treasure -9.71925 +▁eager -9.72052 +▁bi -9.72074 +▁salt -9.72239 +▁fly -9.72313 +▁parts -9.7254 +pec -9.72573 +▁arthur -9.72647 +▁affairs -9.7268 +▁slow -9.72704 +▁consist -9.72808 +▁devil -9.72834 +▁affection -9.73001 +▁bore -9.7301 +▁kiss -9.73036 +▁engaged -9.73052 +▁officer -9.73173 +ification -9.73228 +▁milk -9.73339 +▁process -9.73375 +▁gift -9.73398 +▁dan -9.73398 +▁lamp -9.73427 +▁hid -9.73427 +▁pulled -9.73464 +▁excellent -9.73521 +▁impression -9.73522 +▁telling -9.73545 +▁proved -9.73575 +▁authority -9.73576 +▁tower -9.73802 +▁consequence -9.73814 +▁ray -9.73837 +▁favor -9.73953 +▁flew -9.73962 +▁charles -9.73993 +▁address -9.73994 +▁familiar -9.74108 +▁confidence -9.74112 +▁limit -9.74112 +▁weeks -9.74244 +▁woods -9.74288 +▁direct -9.74355 +▁intention -9.74383 +▁rare -9.74439 +▁perform -9.74547 +▁solemn -9.74551 +▁distant -9.74552 +▁bur -9.74558 +▁image -9.74713 +▁president -9.74847 +▁firm -9.74855 +▁indian -9.74876 +▁rid -9.74907 +▁rank -9.74916 +▁liked -9.74918 +▁houses -9.74982 +▁agree -9.75016 +▁ya -9.7506 +▁matters -9.7508 +▁working -9.75208 +▁prison -9.75226 +▁major -9.75227 +▁slip -9.75273 +like -9.75278 +▁mode -9.75344 +▁aware -9.75452 +▁looks -9.75466 +▁weight -9.75468 +▁busy -9.75475 +▁wound -9.7562 +▁bath -9.75727 +hen -9.75879 +▁wore -9.75892 +▁exercise -9.7604 +▁similar -9.7604 +▁amount -9.7619 +▁questions -9.76376 +▁violent -9.76642 +▁excuse -9.76643 +▁aside -9.76705 +▁dull -9.76778 +▁emperor -9.76793 +▁nevertheless -9.76793 +▁shout -9.76836 +gue -9.76895 +▁explained -9.76923 +▁accomplish -9.76944 +lung -9.77072 +▁instantly -9.77126 +▁mistake -9.77134 +▁smooth -9.77248 +▁strike -9.77248 +▁horror -9.77552 +▁science -9.77552 +▁protest -9.77553 +▁bob -9.77559 +▁obey -9.77567 +▁manage -9.77573 +▁ama -9.77643 +▁press -9.77671 +▁necessity -9.77704 +▁splendid -9.77704 +▁holy -9.77754 +▁interesting -9.7778 +ath -9.7784 +▁religion -9.77857 +▁unknown -9.77857 +▁fierce -9.7801 +▁disappeared -9.78045 +▁unc -9.78099 +▁naturally -9.7813 +▁louis -9.78163 +▁drove -9.78164 +▁played -9.78241 +▁brand -9.78401 +ford -9.78471 +▁hate -9.78556 +▁lines -9.78597 +▁shoot -9.78625 +▁consent -9.78635 +▁agreed -9.7869 +▁seated -9.78715 +▁stir -9.78774 +▁circle -9.78778 +▁streets -9.78825 +bble -9.78905 +▁task -9.78939 +▁produced -9.7904 +▁accident -9.79087 +burg -9.79088 +▁lin -9.79162 +▁witness -9.79162 +▁liberty -9.79241 +▁detail -9.79242 +▁minister -9.79242 +▁powerful -9.79327 +▁savage -9.79397 +▁sixteen -9.79397 +▁pretend -9.79552 +▁coast -9.79554 +▁utter -9.79799 +▁named -9.79837 +▁clever -9.7993 +▁admit -9.79966 +▁couple -9.80019 +▁message -9.80021 +▁wicked -9.80023 +▁bro -9.80067 +▁temple -9.80175 +▁stones -9.80204 +▁yesterday -9.80332 +▁hills -9.80372 +▁plea -9.80428 +▁sca -9.80497 +▁slight -9.80546 +▁squ -9.80554 +▁diamond -9.80646 +▁possibly -9.80646 +▁affair -9.80767 +▁hearing -9.8086 +▁original -9.80867 +▁sell -9.80869 +▁worthy -9.80872 +▁cottage -9.8096 +▁progress -9.8096 +▁sacrifice -9.8096 +▁shock -9.80961 +▁sunday -9.80961 +▁design -9.80964 +▁sought -9.80966 +lus -9.81045 +▁otherwise -9.81118 +right -9.81118 +▁prayer -9.81126 +▁cabin -9.81127 +▁dwell -9.81146 +▁rev -9.81234 +▁bridge -9.81314 +▁particularly -9.81374 +ied -9.81392 +▁yield -9.81434 +▁treat -9.81442 +▁oak -9.81465 +▁gain -9.81614 +win -9.81616 +▁rope -9.81746 +tan -9.81759 +ou -9.81816 +▁orders -9.81844 +▁suspect -9.8191 +▁edward -9.82087 +▁eleven -9.82229 +ability -9.82243 +▁occurred -9.82244 +▁teeth -9.82246 +▁val -9.82333 +▁lion -9.82382 +▁america -9.82547 +▁falling -9.8255 +ists -9.82559 +▁depart -9.82607 +▁keeping -9.82633 +▁demand -9.82658 +nny -9.82735 +▁paused -9.82763 +▁ceased -9.82864 +▁cheer -9.83045 +▁pardon -9.83193 +▁native -9.83194 +oon -9.83204 +▁beg -9.83285 +itude -9.83312 +▁dogs -9.83322 +▁required -9.8337 +▁elect -9.83506 +▁entertain -9.83514 +ina -9.83517 +▁blu -9.83533 +▁huge -9.83628 +▁carrying -9.83629 +▁insist -9.83641 +▁satisfaction -9.83676 +board -9.83736 +▁upper -9.83744 +ord -9.8376 +▁hunt -9.83761 +▁countenance -9.83838 +▁maiden -9.83958 +▁james -9.84004 +▁foreign -9.84011 +▁failed -9.84019 +▁gather -9.8402 +▁fun -9.8409 +▁test -9.84104 +▁pal -9.84163 +▁mighty -9.84183 +▁pit -9.8431 +▁silk -9.84328 +▁terms -9.8435 +▁page -9.84434 +▁knees -9.84447 +▁brothers -9.84472 +▁shown -9.8448 +▁professor -9.84527 +▁log -9.84552 +more -9.84553 +▁defi -9.8461 +▁cart -9.84746 +▁charm -9.84749 +▁require -9.84799 +▁proof -9.84816 +▁softly -9.84961 +▁unfortunate -9.8498 +▁possessed -9.84987 +▁severe -9.85032 +▁singing -9.85039 +▁stage -9.8507 +▁medi -9.85097 +▁price -9.85122 +▁freedom -9.85145 +▁farther -9.85228 +▁shouted -9.85263 +▁majesty -9.85309 +▁previous -9.85309 +▁guide -9.85355 +▁match -9.85362 +▁chest -9.85369 +▁intended -9.85443 +▁excitement -9.85485 +▁officers -9.85487 +▁shake -9.85565 +▁sentiment -9.85639 +▁gently -9.85644 +▁succeeded -9.85691 +▁sur -9.85879 +▁ki -9.8588 +pha -9.85914 +▁mention -9.85927 +▁acquaintance -9.85969 +▁imagination -9.85969 +▁physical -9.85969 +▁leading -9.85978 +▁slave -9.8605 +▁lock -9.8607 +▁base -9.86187 +▁steam -9.86204 +▁term -9.86288 +▁pointed -9.86301 +▁pipe -9.86304 +▁shade -9.86323 +▁invent -9.86325 +▁regret -9.86468 +▁alas -9.86474 +▁faithful -9.86713 +▁worked -9.86766 +▁bay -9.86795 +▁record -9.86801 +▁complain -9.86802 +▁mentioned -9.86831 +▁superior -9.86969 +▁hotel -9.87087 +▁seventy -9.87096 +▁sheep -9.87201 +▁advice -9.87304 +▁hidden -9.8732 +▁demanded -9.87361 +▁fore -9.8737 +▁meal -9.87387 +▁conscious -9.8739 +ky -9.87404 +▁possess -9.87473 +▁praise -9.87488 +▁brow -9.87501 +▁fourth -9.87589 +▁events -9.87621 +▁advanced -9.87786 +▁resolved -9.87809 +▁stuff -9.87809 +▁cheerful -9.87861 +▁fri -9.87884 +▁fairy -9.87922 +▁birth -9.87978 +▁afford -9.8798 +▁grief -9.87988 +▁sides -9.88093 +▁substance -9.88147 +▁article -9.88148 +▁level -9.8815 +▁wake -9.88165 +ville -9.88325 +▁joined -9.88349 +▁mist -9.88439 +▁practical -9.88486 +▁clearly -9.88488 +▁trace -9.88538 +▁awake -9.8864 +▁lack -9.88656 +▁basket -9.88656 +▁observe -9.88658 +ette -9.88747 +▁spirits -9.88853 +▁excited -9.88955 +▁abandon -9.88997 +▁shining -9.89001 +▁fully -9.89019 +▁calling -9.89202 +van -9.89205 +▁considerable -9.89318 +▁sprang -9.8934 +▁mile -9.89356 +▁dangerous -9.89425 +▁pounds -9.89446 +▁jew -9.89454 +▁fox -9.89599 +▁information -9.89684 +▁wit -9.89688 +▁deck -9.8973 +▁lies -9.8975 +▁paul -9.89839 +▁stars -9.90127 +▁anger -9.90188 +▁strain -9.90201 +▁faces -9.90244 +▁settle -9.90251 +▁adam -9.90281 +▁smith -9.90373 +▁citi -9.90381 +▁importance -9.90385 +▁feather -9.9072 +▁willing -9.90763 +▁served -9.90764 +▁author -9.90817 +▁perceived -9.90847 +▁haven -9.90898 +▁flame -9.90907 +▁divine -9.90945 +▁trail -9.91006 +▁anybody -9.91068 +▁sigh -9.91159 +▁delicate -9.91243 +▁desired -9.91307 +war -9.91329 +▁curiosity -9.91418 +▁practice -9.91418 +▁fold -9.91533 +▁absolutely -9.91541 +▁bottle -9.91607 +▁consideration -9.91616 +▁prop -9.91638 +▁meat -9.91639 +▁choose -9.91768 +▁occupied -9.91768 +▁interested -9.91782 +▁throat -9.91978 +▁candle -9.91985 +▁dawn -9.91996 +cha -9.92028 +▁protect -9.92033 +▁sentence -9.92088 +▁rocks -9.92105 +▁apparently -9.9218 +▁portion -9.92182 +▁aid -9.92242 +▁tight -9.92315 +▁actually -9.92396 +▁presented -9.92442 +▁dying -9.92675 +▁daily -9.92765 +▁political -9.92827 +▁bodies -9.92828 +▁suffered -9.9284 +▁modern -9.92845 +▁completely -9.92895 +▁sooner -9.92933 +▁advance -9.93029 +▁refused -9.93067 +▁farmer -9.93074 +▁polite -9.93183 +▁plate -9.93356 +▁thunder -9.93361 +▁elsie -9.93364 +▁sailor -9.93371 +▁brief -9.93374 +▁suggested -9.93403 +▁anti -9.93442 +▁flesh -9.93541 +▁buck -9.93573 +▁weep -9.93586 +▁dri -9.93665 +▁ocean -9.93719 +▁spend -9.93721 +▁odd -9.9377 +▁governor -9.93809 +well -9.93829 +▁entrance -9.93898 +▁suspicion -9.93898 +▁stepped -9.93935 +▁rapidly -9.93971 +▁check -9.93987 +low -9.94128 +▁club -9.94131 +▁flight -9.94132 +▁hide -9.94165 +▁entire -9.94167 +▁indians -9.94179 +▁sam -9.94213 +▁capital -9.94257 +▁mamma -9.94258 +▁jud -9.94284 +▁correct -9.94437 +▁haste -9.94579 +▁pace -9.9458 +▁crack -9.94583 +▁sensation -9.94619 +▁worst -9.94619 +▁driven -9.94787 +▁midst -9.94797 +▁august -9.94799 +▁proportion -9.94799 +▁innocent -9.94799 +ja -9.94854 +▁doors -9.94913 +▁regarded -9.95005 +▁education -9.95016 +▁employ -9.95052 +▁truly -9.95138 +liness -9.9516 +▁instrument -9.95161 +▁foolish -9.95213 +ility -9.95287 +▁frame -9.95289 +▁taught -9.95343 +▁nay -9.95365 +▁hang -9.95432 +▁argument -9.95525 +▁nineteen -9.95525 +▁elder -9.95574 +og -9.95638 +▁spar -9.95647 +▁papers -9.95683 +▁neighbor -9.957 +▁instruct -9.95708 +▁reward -9.95728 +▁fields -9.95806 +▁equally -9.95809 +▁needed -9.95816 +▁conditions -9.95965 +▁ways -9.95977 +▁request -9.96074 +▁worn -9.96075 +▁dig -9.96135 +▁load -9.96212 +▁remarkable -9.96225 +▁worship -9.96257 +▁park -9.96344 +▁interrupted -9.96393 +▁skill -9.96396 +▁critic -9.96441 +▁distress -9.96442 +▁belief -9.96442 +▁stern -9.9649 +▁track -9.96546 +▁hunting -9.96568 +▁jewel -9.96585 +▁gradually -9.96625 +▁glow -9.96653 +▁mental -9.96704 +▁rushed -9.96737 +▁powers -9.96763 +▁visitor -9.96783 +ight -9.96826 +▁behold -9.96859 +▁ski -9.96872 +▁picked -9.96903 +▁expressed -9.96991 +artagnan -9.96994 +▁moreover -9.96997 +▁keen -9.96998 +▁operation -9.97029 +▁careful -9.97036 +▁hence -9.97131 +▁wander -9.97162 +▁enemies -9.9718 +▁mysterious -9.9718 +▁assert -9.97181 +▁depth -9.97182 +ium -9.97185 +▁prefer -9.97198 +▁charming -9.97301 +▁crossed -9.97306 +▁dread -9.97315 +nnie -9.97438 +▁robin -9.97446 +▁relief -9.97556 +▁inquired -9.9758 +▁apple -9.97602 +▁urge -9.97616 +▁wings -9.97698 +▁choice -9.97737 +▁tre -9.97846 +▁species -9.97924 +▁delighted -9.97997 +▁rapid -9.98035 +▁appeal -9.98111 +▁famous -9.98111 +▁civili -9.98157 +▁helen -9.98168 +▁useful -9.9818 +▁card -9.98181 +▁newspaper -9.98298 +▁plenty -9.98298 +qua -9.98375 +▁bearing -9.98432 +▁nervous -9.98486 +▁rub -9.98727 +▁roar -9.98756 +▁wounded -9.98825 +▁chain -9.98829 +▁produce -9.98919 +▁reflection -9.99014 +▁baron -9.99026 +▁merchant -9.99051 +▁quarrel -9.99051 +▁glory -9.99051 +▁begun -9.99086 +▁queer -9.99244 +▁mix -9.9934 +▁whisper -9.99361 +rg -9.99439 +▁buried -9.9944 +▁bid -9.99446 +▁tip -9.99521 +▁frequently -9.99541 +▁div -9.99601 +▁knee -9.99684 +▁region -9.99813 +ctor -9.99893 +▁root -9.99909 +▁trip -9.99947 +▁jealous -10 +head -10.0005 +▁saved -10.0006 +▁pig -10.0007 +▁phil -10.0019 +▁union -10.0028 +▁ships -10.0029 +▁companions -10.0031 +▁approached -10.0038 +▁harry -10.0038 +▁arrival -10.0038 +▁drunk -10.0038 +▁slept -10.0038 +▁furnish -10.0038 +▁hale -10.0039 +▁para -10.004 +▁heap -10.0047 +▁absence -10.0058 +▁shoes -10.0065 +▁consciousness -10.0067 +▁kindly -10.008 +bel -10.0083 +▁evident -10.0089 +▁lest -10.0095 +▁grasp -10.0104 +▁steal -10.0106 +lon -10.0107 +▁knife -10.0115 +▁precious -10.0115 +▁element -10.0118 +▁proceeded -10.013 +▁fever -10.013 +▁leader -10.0134 +▁risk -10.0137 +▁ease -10.0139 +▁mount -10.0149 +▁meanwhile -10.0154 +▁century -10.0154 +▁grim -10.0155 +▁owe -10.0167 +▁judgment -10.0173 +▁arose -10.0174 +▁vision -10.0176 +▁sang -10.0177 +▁extreme -10.0186 +▁constant -10.0186 +▁asking -10.0188 +▁observation -10.0192 +▁thrust -10.0192 +▁delay -10.0193 +▁hit -10.0211 +▁includ -10.0212 +▁admire -10.0212 +▁lift -10.0219 +▁lesson -10.022 +▁friendship -10.0221 +▁spare -10.0222 +▁issue -10.0223 +▁principal -10.0231 +▁mourn -10.0232 +▁capable -10.0235 +▁burning -10.0241 +▁accepted -10.0242 +▁extraordinary -10.0251 +▁hoped -10.0256 +▁removed -10.0257 +▁horn -10.0261 +▁cent -10.0262 +▁alice -10.0272 +▁chap -10.028 +▁apartment -10.0284 +▁fighting -10.0284 +▁trembling -10.029 +▁somebody -10.029 +▁anyone -10.0291 +▁blame -10.0294 +▁bride -10.0299 +▁reader -10.0304 +▁everywhere -10.031 +▁labour -10.031 +▁recall -10.031 +▁rob -10.0317 +▁bull -10.0324 +▁council -10.0329 +▁popular -10.0329 +▁trial -10.0337 +▁wishes -10.0348 +▁dun -10.0349 +▁assured -10.0349 +▁brilliant -10.0349 +▁forgot -10.035 +▁cab -10.0352 +▁continue -10.0358 +▁acknowledg -10.0369 +▁retreat -10.0369 +▁increased -10.0374 +▁contempt -10.0389 +▁grandfather -10.0389 +▁sympathy -10.0389 +▁ghost -10.0389 +▁creatures -10.0407 +▁ken -10.0408 +▁stretched -10.0409 +▁playing -10.0415 +▁hind -10.0417 +▁members -10.0428 +▁miserable -10.0428 +▁kindness -10.0435 +▁gla -10.0444 +▁highest -10.0447 +aries -10.0457 +▁eighty -10.0467 +▁kissed -10.0468 +▁deserve -10.0468 +▁begged -10.0474 +▁hut -10.0478 +▁closely -10.0485 +▁wondered -10.0499 +▁larger -10.0505 +▁accordingly -10.0508 +▁military -10.0508 +▁remind -10.0508 +▁destroy -10.0527 +▁maintain -10.0528 +▁engine -10.0528 +▁motive -10.0529 +wick -10.0531 +▁strip -10.0543 +ison -10.0544 +▁hans -10.0548 +▁ahead -10.0562 +▁magic -10.0565 +▁infinite -10.0569 +▁prompt -10.0569 +▁informed -10.0571 +▁peer -10.0594 +▁pressed -10.0603 +▁somewhere -10.0609 +▁bought -10.0609 +▁trap -10.0621 +▁scar -10.0623 +▁visible -10.063 +▁ashamed -10.0631 +gar -10.0643 +▁neighbour -10.0649 +▁constitution -10.065 +▁intelligence -10.065 +▁tear -10.0651 +▁profession -10.0655 +▁hungry -10.0661 +▁smell -10.067 +▁listening -10.0671 +▁stories -10.0672 +▁approach -10.0676 +▁aim -10.0681 +▁ham -10.0682 +▁string -10.0684 +▁explanation -10.0691 +▁immense -10.0691 +▁religious -10.0691 +▁hollow -10.0691 +abeth -10.0691 +▁throughout -10.0691 +▁await -10.0691 +▁flying -10.0699 +cum -10.071 +▁scream -10.0711 +▁active -10.0716 +port -10.0718 +ett -10.0729 +▁product -10.0731 +▁unhappy -10.0731 +▁vague -10.0733 +▁stupid -10.0752 +▁dignity -10.0752 +▁isabel -10.0752 +▁pitch -10.0767 +▁comrade -10.0773 +▁reckon -10.0773 +▁stiff -10.0773 +rick -10.0779 +▁spark -10.078 +▁sold -10.0785 +▁stro -10.0806 +▁crying -10.0812 +▁repeat -10.0817 +▁comfortable -10.0831 +▁marked -10.0834 +▁project -10.0835 +▁becoming -10.0835 +▁parents -10.0835 +▁shelter -10.0836 +field -10.0839 +▁nest -10.0841 +▁stole -10.0843 +▁hint -10.0844 +▁trick -10.0849 +▁thoroughly -10.0852 +▁hospital -10.0855 +▁weapon -10.0855 +▁style -10.0856 +▁rome -10.0857 +▁admitted -10.0862 +▁safety -10.0866 +▁understanding -10.0871 +▁weary -10.0872 +▁slaves -10.088 +▁print -10.0886 +▁credit -10.0897 +▁unable -10.0914 +▁clouds -10.0917 +▁conclusion -10.0918 +▁seldom -10.0918 +▁unusual -10.0918 +▁hanging -10.0942 +▁david -10.096 +▁bowed -10.0963 +mond -10.0969 +▁pushed -10.0983 +▁escaped -10.0988 +▁warn -10.099 +▁betray -10.1002 +▁eggs -10.1024 +▁plainly -10.1028 +▁ser -10.1036 +▁exhibit -10.1044 +▁gay -10.1047 +▁display -10.1065 +▁member -10.1066 +▁grin -10.1078 +▁prospect -10.1086 +▁brush -10.1086 +▁waves -10.1087 +▁successful -10.11 +▁extent -10.1108 +▁persuade -10.1129 +▁mood -10.1136 +▁mid -10.1138 +▁arranged -10.115 +▁universal -10.115 +▁jim -10.1153 +▁signal -10.116 +▁whilst -10.1172 +▁wolf -10.1172 +▁philip -10.1173 +▁billy -10.1195 +▁eagerly -10.1196 +▁returning -10.1207 +▁conscience -10.1215 +▁fortunate -10.1215 +▁gleam -10.1215 +▁female -10.1215 +▁hastily -10.1216 +▁provided -10.1218 +▁obtain -10.1221 +▁render -10.1221 +▁instinct -10.1236 +▁concerning -10.1239 +▁concerned -10.1241 +▁rum -10.1247 +▁vol -10.1256 +▁somehow -10.1258 +▁gall -10.1259 +▁pink -10.126 +▁artist -10.1267 +▁accustomed -10.128 +▁unconscious -10.128 +▁advise -10.128 +mmed -10.1283 +▁tiny -10.1288 +▁mud -10.1288 +▁branches -10.1291 +▁refuse -10.1294 +▁rage -10.1295 +▁bishop -10.1301 +▁supply -10.1301 +▁peasant -10.1301 +▁lawyer -10.1302 +▁connection -10.1306 +▁develop -10.1316 +▁correspond -10.1323 +▁rang -10.1325 +house -10.1336 +▁plum -10.1345 +▁nodded -10.1345 +▁slipped -10.1347 +▁kit -10.1349 +▁constantly -10.1352 +▁earl -10.1356 +▁fairly -10.1365 +▁features -10.138 +▁pause -10.1384 +▁painful -10.1388 +▁super -10.1397 +▁laughter -10.1399 +▁whence -10.14 +▁opera -10.1401 +▁joe -10.1402 +▁eating -10.1408 +▁christmas -10.1411 +time -10.1412 +▁wholly -10.1416 +▁apart -10.1418 +▁coach -10.1418 +▁crew -10.143 +▁cheeks -10.1431 +▁revolution -10.1432 +▁lonely -10.1433 +▁attain -10.1433 +▁luc -10.1436 +▁established -10.1437 +▁throne -10.1439 +▁dash -10.144 +▁friendly -10.1443 +▁exhaust -10.1454 +▁cliff -10.1455 +▁reveal -10.1455 +▁adopt -10.1455 +▁centre -10.1457 +▁merry -10.1469 +▁sylvia -10.1477 +▁misfortune -10.1499 +▁feast -10.1499 +▁arab -10.1509 +▁fetch -10.1521 +▁descend -10.153 +ick -10.1531 +▁nut -10.1542 +▁fought -10.1543 +ko -10.1545 +▁setting -10.1558 +▁source -10.1566 +▁persist -10.1566 +▁mercy -10.1571 +▁compare -10.1581 +▁deeply -10.1584 +▁pile -10.1584 +▁attitude -10.1588 +▁delightful -10.1597 +▁endure -10.1602 +▁patience -10.161 +▁local -10.161 +▁victory -10.1615 +▁uttered -10.1622 +▁treated -10.1623 +▁separate -10.1626 +▁dragg -10.1627 +▁beard -10.1643 +▁rear -10.1652 +▁tied -10.1657 +▁title -10.1657 +▁triumph -10.1674 +▁gained -10.1688 +▁defend -10.17 +bury -10.1714 +▁increase -10.1717 +▁bark -10.172 +▁fled -10.1725 +▁pond -10.1728 +▁conquer -10.1746 +▁forehead -10.1746 +▁wag -10.1749 +▁organi -10.1751 +▁anxiety -10.1768 +▁encounter -10.1768 +▁sex -10.1773 +▁sank -10.1779 +▁halt -10.1784 +ella -10.1789 +▁cheek -10.1792 +▁writer -10.1793 +chi -10.1796 +▁employed -10.1805 +▁humble -10.1806 +▁raise -10.181 +▁troops -10.1814 +▁distinguished -10.1816 +▁giant -10.1821 +▁sink -10.1822 +▁flag -10.1826 +car -10.1826 +▁obtained -10.183 +▁discovery -10.1836 +▁national -10.1842 +▁jumped -10.1842 +▁commission -10.1859 +▁positive -10.1859 +▁loving -10.186 +▁exact -10.1861 +▁ideal -10.1862 +▁range -10.1864 +▁refer -10.1874 +▁murmured -10.1877 +▁encourage -10.1882 +▁college -10.1882 +▁novel -10.1884 +worth -10.1892 +▁mortal -10.1906 +▁fan -10.1914 +▁rolled -10.1915 +▁guilty -10.1918 +▁victor -10.1926 +▁approaching -10.1945 +▁relative -10.1952 +▁estate -10.1952 +▁ugly -10.1952 +▁metal -10.1967 +▁dared -10.1969 +▁boots -10.1969 +▁robert -10.1976 +▁clock -10.198 +▁admiration -10.1998 +▁fourteen -10.1998 +▁witch -10.1999 +▁barbar -10.2001 +▁pra -10.2017 +▁cake -10.2022 +▁shone -10.2025 +▁managed -10.2031 +▁volume -10.2045 +▁greek -10.2045 +▁dancing -10.2045 +j -10.2055 +▁wretched -10.2055 +▁condemn -10.2068 +▁magnificent -10.2068 +▁consult -10.2068 +▁fleet -10.2083 +▁arrangement -10.2092 +▁incident -10.2092 +▁misery -10.2092 +▁arrow -10.2094 +▁stroke -10.2099 +▁assist -10.21 +▁succeed -10.2108 +▁recent -10.2109 +▁build -10.211 +▁desperate -10.2115 +▁widow -10.2115 +▁market -10.2129 +fall -10.213 +▁wisdom -10.2139 +▁current -10.2139 +▁spoil -10.2139 +▁resist -10.2161 +▁obvious -10.2163 +▁sensible -10.2163 +▁wooden -10.2166 +▁addressed -10.2184 +▁bade -10.2185 +▁counsel -10.2186 +▁select -10.2186 +▁purchase -10.2186 +▁useless -10.2187 +▁fin -10.2195 +▁bringing -10.2207 +▁arrest -10.221 +▁stared -10.2212 +▁poison -10.2213 +▁gil -10.2214 +▁swallow -10.2234 +▁anna -10.2234 +rate -10.2234 +▁slid -10.2236 +▁block -10.2237 +▁sport -10.2242 +▁ninety -10.2245 +▁provide -10.2255 +▁lamb -10.2259 +▁interval -10.226 +▁described -10.228 +▁provision -10.2282 +▁striking -10.2282 +▁proposed -10.2285 +▁jump -10.2287 +▁suggest -10.2303 +▁melancholy -10.2306 +▁warrior -10.2306 +▁burden -10.2308 +▁departure -10.2309 +▁limb -10.2316 +▁troubled -10.2325 +▁meadow -10.233 +▁sacred -10.233 +▁straw -10.233 +▁tru -10.2332 +▁solid -10.2334 +▁soil -10.2348 +▁lucy -10.2348 +▁civil -10.2348 +▁recover -10.2348 +▁energy -10.2354 +▁powder -10.2354 +▁resumed -10.2354 +▁intense -10.2354 +▁british -10.2378 +▁agreeable -10.2389 +▁trot -10.2393 +▁everyone -10.2393 +▁concern -10.2394 +▁voyage -10.2402 +▁southern -10.2402 +▁bosom -10.2406 +▁utterly -10.2424 +▁essential -10.2426 +▁feed -10.2427 +▁household -10.243 +▁extremely -10.2434 +▁wondering -10.2435 +▁list -10.2446 +▁experiment -10.2451 +▁joseph -10.2451 +▁mystery -10.2451 +▁restore -10.2455 +▁blush -10.2456 +fold -10.2459 +▁lap -10.2464 +▁chosen -10.2471 +▁epi -10.2472 +▁intellect -10.2475 +▁curtain -10.2475 +ology -10.2475 +▁pine -10.2477 +▁mounted -10.2481 +har -10.249 +▁punish -10.2492 +▁drift -10.2502 +▁wedding -10.2506 +▁ko -10.2508 +▁preparation -10.2524 +▁resolution -10.2524 +▁oppress -10.2524 +▁fix -10.2535 +▁sch -10.2548 +▁victim -10.2549 +▁summon -10.2549 +▁julia -10.2549 +▁flood -10.2551 +▁slightly -10.257 +▁lodge -10.2578 +▁unexpected -10.2598 +▁confusion -10.2598 +▁addition -10.2598 +▁conceive -10.2598 +▁jesus -10.2599 +▁wire -10.2608 +long -10.2615 +▁rude -10.2624 +▁fatal -10.2627 +▁patch -10.2629 +▁careless -10.2629 +▁vari -10.2635 +▁wal -10.2643 +▁catherine -10.2647 +▁parliament -10.2647 +▁profound -10.2647 +▁aloud -10.2648 +▁relieve -10.2649 +▁push -10.266 +▁accompanied -10.2672 +▁sovereign -10.2672 +▁singular -10.2672 +▁composed -10.2672 +▁assistance -10.2676 +▁echo -10.2678 +▁shaking -10.2679 +▁teacher -10.2684 +▁horrible -10.2697 +▁strict -10.2697 +▁gown -10.2703 +▁punishment -10.2704 +▁verse -10.2712 +atory -10.2712 +▁mistaken -10.2716 +▁swept -10.2722 +▁gesture -10.2722 +▁steel -10.2724 +▁bush -10.2735 +▁affected -10.2739 +▁directed -10.2745 +▁absurd -10.2747 +▁surrounded -10.2747 +▁scrap -10.2749 +▁sugar -10.2749 +▁immediate -10.2753 +▁saddle -10.2753 +▁sighed -10.2768 +▁govern -10.2768 +▁pea -10.2769 +▁snap -10.2769 +▁arise -10.277 +▁exchange -10.2772 +▁impatient -10.2772 +▁whip -10.2794 +▁stretch -10.2797 +▁embrace -10.2798 +▁disease -10.2798 +▁profit -10.2798 +▁riding -10.2802 +▁recovered -10.2803 +▁convinced -10.2814 +▁leaning -10.2815 +▁domestic -10.2823 +▁complex -10.2823 +▁manifest -10.2823 +▁indulge -10.2823 +▁genius -10.2824 +▁agent -10.2841 +▁veil -10.2841 +▁description -10.2848 +▁inclined -10.2848 +▁deceive -10.2848 +▁mac -10.2851 +▁darling -10.2861 +▁reign -10.2866 +▁enormous -10.2874 +▁restrain -10.2874 +▁duties -10.2876 +▁enable -10.2899 +ttered -10.2902 +▁pole -10.2906 +▁exception -10.292 +▁intimate -10.2925 +▁countess -10.2927 +▁tribe -10.2931 +▁oil -10.2938 +cast -10.2944 +▁handkerchief -10.295 +▁midnight -10.295 +▁problem -10.295 +▁reli -10.2951 +▁unre -10.2952 +▁crush -10.2959 +▁discuss -10.296 +▁tramp -10.296 +▁whirl -10.2977 +▁hori -10.2985 +hin -10.2992 +▁official -10.3001 +▁drown -10.3002 +▁pierre -10.3002 +▁scheme -10.3002 +▁locked -10.3006 +▁permitted -10.3007 +▁carr -10.3007 +▁connected -10.3008 +▁assure -10.3015 +▁cock -10.3018 +▁utmost -10.3027 +▁devoted -10.3027 +▁sufficiently -10.3036 +ulation -10.304 +▁intellectual -10.3053 +▁carpet -10.3053 +▁objection -10.3062 +▁afterward -10.3067 +▁reality -10.3067 +cho -10.3068 +gate -10.3074 +▁negro -10.3079 +▁retain -10.3079 +▁ascend -10.3079 +▁cease -10.308 +▁marvel -10.3081 +most -10.3086 +▁bond -10.3092 +▁kate -10.3101 +▁breaking -10.3104 +▁coal -10.3105 +▁ignorant -10.3106 +▁twin -10.3109 +▁astonishment -10.3131 +▁coffee -10.3131 +▁execut -10.3146 +▁origin -10.3147 +▁final -10.3151 +▁inhabitants -10.3157 +▁stable -10.3164 +▁parties -10.3169 +▁cities -10.3169 +▁generous -10.3183 +▁describe -10.3185 +▁jar -10.3187 +▁plunge -10.3192 +▁announced -10.3202 +▁merit -10.3207 +▁ere -10.3222 +▁disappoint -10.3228 +▁suggestion -10.3233 +▁doubtless -10.3234 +▁trunk -10.3236 +▁job -10.3253 +▁stamp -10.3257 +▁divided -10.3258 +▁appointed -10.3259 +▁acquainted -10.3262 +▁absolute -10.327 +▁fearful -10.3279 +▁privilege -10.3289 +▁steep -10.3291 +▁vote -10.3291 +▁craft -10.3296 +▁hunter -10.3296 +▁modest -10.3303 +▁forbid -10.3305 +▁endeavour -10.3315 +▁sweep -10.3315 +▁beheld -10.3315 +acious -10.332 +▁absorb -10.3342 +▁construct -10.3342 +▁expedition -10.3342 +▁empire -10.3342 +▁erect -10.3343 +▁offend -10.3344 +▁intend -10.3351 +▁chin -10.3356 +▁permit -10.3363 +▁contract -10.3368 +▁thirst -10.3369 +▁destroyed -10.337 +▁ger -10.3375 +▁wagon -10.3378 +▁gloom -10.3393 +▁atmosphere -10.3395 +▁reserve -10.3395 +lock -10.3412 +▁nonsense -10.3422 +▁prevail -10.3422 +▁quality -10.3422 +▁clasp -10.3422 +▁concluded -10.3426 +▁katy -10.3433 +▁eternal -10.3449 +▁neglect -10.3449 +▁creep -10.345 +▁squire -10.345 +▁muttered -10.3452 +▁electric -10.3452 +▁hay -10.3456 +▁expense -10.3476 +▁scorn -10.3476 +▁retired -10.3476 +▁murmur -10.3482 +▁stout -10.3484 +▁sharply -10.35 +▁district -10.3503 +▁leaf -10.3503 +▁failure -10.3507 +▁numerous -10.353 +▁infant -10.3531 +▁traveller -10.3535 +▁crep -10.354 +▁june -10.3547 +work -10.3547 +▁hunger -10.3548 +▁recommend -10.3557 +▁jean -10.3562 +▁richard -10.3571 +▁monte -10.3588 +▁preach -10.3593 +▁palm -10.3594 +▁tap -10.36 +▁anywhere -10.3612 +▁disposition -10.3612 +▁mirror -10.3612 +▁venture -10.3616 +▁pound -10.3638 +▁cigar -10.3639 +▁invited -10.364 +▁bench -10.3645 +▁protection -10.3653 +▁benefit -10.3667 +▁thomas -10.3667 +▁reproach -10.3694 +▁clerk -10.3694 +hu -10.3707 +▁uniform -10.3722 +▁generation -10.3722 +▁compass -10.3722 +▁warning -10.3723 +▁extended -10.3728 +▁difficulties -10.3731 +▁affect -10.374 +▁maybe -10.3741 +▁comb -10.3743 +▁seal -10.3743 +▁groan -10.3743 +▁western -10.3751 +▁chop -10.3753 +▁earn -10.3756 +▁score -10.3758 +▁idle -10.3761 +▁astonished -10.3777 +▁introduced -10.3777 +▁lieutenant -10.3777 +▁leisure -10.3777 +▁violence -10.3777 +▁firmly -10.3778 +▁monster -10.3784 +▁properly -10.3785 +▁rendered -10.3797 +▁twist -10.3805 +▁pirate -10.3807 +▁batter -10.3808 +▁robber -10.3809 +▁wept -10.3815 +▁descended -10.3821 +▁throwing -10.3822 +▁leaned -10.3823 +▁ornament -10.3834 +▁andrew -10.3839 +▁capture -10.3841 +▁bushes -10.3852 +▁republic -10.3861 +▁confident -10.3862 +▁lean -10.3902 +▁date -10.3904 +▁counter -10.3909 +▁northern -10.3918 +▁pearl -10.3924 +▁nearest -10.3933 +▁francis -10.3946 +▁wandering -10.3948 +▁frequent -10.3957 +▁startled -10.3961 +▁statement -10.3965 +▁occur -10.3971 +▁bloom -10.3974 +▁nerve -10.3974 +▁induce -10.3978 +▁flatter -10.3984 +▁ambition -10.4002 +▁madam -10.4005 +▁monk -10.4018 +▁rent -10.4023 +▁investigat -10.4031 +▁rabbit -10.4031 +▁confirm -10.4031 +▁regiment -10.4031 +▁submit -10.4031 +▁spell -10.4032 +▁eva -10.4033 +▁slope -10.4036 +▁furious -10.4037 +▁bestow -10.4047 +▁rail -10.4057 +▁ralph -10.4059 +▁compelled -10.4059 +▁thread -10.4059 +▁scattered -10.406 +▁deny -10.4067 +▁curl -10.4068 +▁chill -10.4075 +▁pronounc -10.4088 +▁mankind -10.4088 +▁cattle -10.4091 +▁male -10.4097 +▁execution -10.41 +▁tide -10.4115 +▁supreme -10.4117 +▁valuable -10.4117 +▁likewise -10.4117 +▁convey -10.4117 +▁gloomy -10.4119 +▁coin -10.4122 +▁actual -10.4129 +▁fog -10.4136 +▁tax -10.4139 +▁province -10.4146 +▁grateful -10.4146 +▁spiritual -10.4146 +▁vanished -10.4146 +▁diana -10.4146 +▁haunt -10.4146 +▁dragon -10.4151 +▁crawl -10.4153 +▁neat -10.4154 +▁china -10.4171 +▁gratitude -10.4174 +▁gasp -10.4179 +▁irre -10.419 +▁finish -10.4193 +▁intent -10.4198 +▁fright -10.4202 +▁embarrass -10.4203 +▁thirteen -10.4203 +▁ruth -10.4209 +▁slightest -10.4212 +▁development -10.4213 +▁interview -10.4233 +▁spectacle -10.4233 +▁brook -10.4233 +▁weakness -10.4255 +▁audience -10.4262 +▁consequently -10.4262 +▁abroad -10.4262 +▁release -10.4262 +▁aspect -10.4263 +▁painted -10.4263 +▁insult -10.4263 +▁sooth -10.4269 +▁disappointment -10.427 +▁emerg -10.4271 +▁brig -10.4284 +▁esteem -10.4291 +▁publish -10.4291 +▁passenger -10.4291 +▁invitation -10.4291 +▁piano -10.4291 +▁irish -10.4295 +▁desk -10.4297 +▁beaten -10.4318 +▁fifth -10.432 +▁impulse -10.432 +▁swear -10.432 +▁purple -10.4322 +▁committed -10.4324 +▁countries -10.4327 +▁perceive -10.4328 +▁eaten -10.4329 +▁celebrat -10.435 +▁grandmother -10.435 +▁shudder -10.435 +▁spanish -10.435 +▁sunshine -10.435 +▁hitherto -10.4352 +▁amid -10.4366 +▁mock -10.4378 +▁marilla -10.4379 +▁snake -10.4379 +▁interfere -10.4381 +▁walter -10.4385 +▁marble -10.4388 +terior -10.4394 +▁mission -10.4399 +▁boot -10.4407 +▁furniture -10.4409 +▁driving -10.4409 +▁steady -10.4409 +stead -10.4414 +▁circumstance -10.4417 +▁interpret -10.4438 +▁enchant -10.4438 +▁error -10.4439 +▁conviction -10.4449 +▁helpless -10.445 +▁qualities -10.4468 +▁medicine -10.4468 +▁italian -10.447 +▁hastened -10.4472 +▁occasionally -10.4474 +▁pursued -10.4475 +ux -10.4475 +▁hesitated -10.4493 +▁chase -10.4496 +▁independent -10.4498 +▁oliver -10.4498 +▁linger -10.4503 +▁examined -10.4508 +▁repent -10.4521 +▁physician -10.4528 +▁beloved -10.4558 +▁attached -10.4558 +▁florence -10.4558 +▁honey -10.4565 +▁mouse -10.4569 +▁cries -10.457 +▁poem -10.4573 +▁ram -10.4588 +▁destruction -10.4588 +▁messenger -10.4588 +▁tristram -10.4588 +▁fulfil -10.4588 +▁fancied -10.4588 +▁excess -10.4588 +▁bake -10.4604 +mont -10.4613 +▁thornton -10.4618 +▁quantity -10.4618 +▁wh -10.4628 +▁created -10.4633 +▁curse -10.4637 +▁continually -10.4638 +▁lightning -10.4642 +▁borne -10.4669 +▁mild -10.4673 +ttle -10.4677 +▁disposed -10.4679 +▁rifle -10.4679 +▁polly -10.468 +▁goat -10.4682 +▁total -10.4686 +▁virginia -10.4689 +▁backward -10.469 +▁peril -10.469 +▁kick -10.4691 +▁quo -10.4702 +▁glorious -10.471 +▁multitude -10.471 +▁leather -10.471 +▁absent -10.471 +▁demon -10.4711 +▁torture -10.4711 +▁debt -10.4712 +▁accord -10.4725 +▁catholic -10.474 +▁pill -10.475 +▁flour -10.4764 +▁library -10.4771 +▁pursuit -10.4771 +▁shirt -10.4771 +▁dearest -10.4772 +▁collar -10.4773 +▁declare -10.4781 +▁tempt -10.4784 +▁branch -10.4785 +▁steadily -10.4802 +▁disgust -10.4802 +▁silly -10.4803 +▁robe -10.481 +▁arrive -10.4812 +▁drank -10.4832 +▁communicat -10.4847 +▁mate -10.485 +▁rachel -10.4863 +▁washington -10.4863 +▁resign -10.4864 +▁meantime -10.4867 +▁engagement -10.4869 +▁separated -10.4872 +▁quiver -10.4872 +▁discussion -10.4882 +▁ventured -10.489 +▁nail -10.4894 +▁surrounding -10.4894 +▁polish -10.4895 +▁lace -10.4896 +▁swell -10.4906 +▁lincoln -10.4926 +▁student -10.4926 +▁glitter -10.4926 +▁joke -10.4931 +▁russian -10.4941 +▁readily -10.4943 +▁poverty -10.4957 +▁disgrace -10.4957 +▁heavily -10.4957 +▁cheese -10.4957 +▁staff -10.4984 +▁entreat -10.4988 +▁farewell -10.4988 +▁lunch -10.4988 +▁peep -10.4989 +▁someone -10.4997 +▁chris -10.5008 +▁disappear -10.5012 +▁decision -10.502 +▁pistol -10.502 +▁spur -10.5021 +▁assumed -10.5027 +▁extend -10.5044 +▁definite -10.5051 +▁enthusiasm -10.5051 +▁undertake -10.5052 +▁committee -10.5083 +▁simon -10.5083 +▁scale -10.5094 +▁applied -10.5115 +▁fence -10.5115 +▁related -10.5117 +▁vice -10.5129 +▁unpleasant -10.5146 +▁probable -10.5146 +▁procure -10.5147 +▁frown -10.515 +istic -10.5168 +▁cloak -10.5182 +▁humanity -10.5191 +▁dwarf -10.521 +▁families -10.521 +▁philosopher -10.521 +▁overcome -10.521 +▁defeat -10.5211 +▁plac -10.5215 +▁fastened -10.5217 +▁tomb -10.5219 +▁classes -10.5236 +▁marsh -10.5239 +▁gracious -10.5243 +▁remote -10.5243 +▁cell -10.5247 +▁shriek -10.5275 +▁rescue -10.5276 +▁chose -10.5281 +▁pool -10.529 +▁slo -10.5298 +▁cutting -10.5301 +▁coward -10.5307 +▁dirty -10.5307 +▁border -10.5307 +▁hook -10.5308 +▁monkey -10.5308 +▁chuck -10.5311 +▁weigh -10.5321 +▁emily -10.5325 +▁jest -10.5328 +▁mule -10.5328 +▁associate -10.534 +▁glimpse -10.534 +▁stuck -10.534 +▁bolt -10.5369 +▁murderer -10.538 +▁pony -10.5385 +▁rattl -10.5401 +▁distinguish -10.5401 +▁institution -10.5405 +▁cunning -10.5405 +▁compliment -10.5405 +▁spin -10.5406 +▁appetite -10.5438 +▁reputation -10.5438 +▁feeble -10.5438 +▁series -10.5452 +▁graceful -10.5457 +▁phrase -10.5471 +▁platform -10.5471 +▁clay -10.5481 +▁opposition -10.5504 +▁boast -10.5505 +▁lane -10.551 +▁growth -10.5527 +▁inclination -10.5537 +▁behave -10.5537 +▁susan -10.5538 +▁dislike -10.5543 +▁distinction -10.5545 +▁illustrat -10.557 +▁nicholas -10.557 +▁satisfy -10.557 +▁drama -10.557 +▁elbow -10.557 +▁consum -10.5571 +▁oath -10.5586 +▁channel -10.5603 +▁spear -10.5603 +▁slain -10.5603 +▁characteristic -10.5605 +▁sauce -10.5609 +▁frog -10.5629 +▁conception -10.5637 +▁timid -10.5637 +▁apparent -10.5659 +▁center -10.567 +▁variety -10.567 +▁dusk -10.5679 +shire -10.5689 +▁apt -10.5693 +▁column -10.5704 +▁revenge -10.5704 +▁rival -10.571 +▁imitat -10.571 +▁passionate -10.5716 +▁selfish -10.5721 +▁norman -10.5725 +▁extra -10.5737 +▁repair -10.5738 +▁thrill -10.5738 +▁treatment -10.5747 +▁rosa -10.575 +▁organ -10.5768 +▁martin -10.5771 +▁indifferent -10.5772 +▁thither -10.5772 +▁pepper -10.5772 +▁gallant -10.5776 +▁recollect -10.5784 +▁scarce -10.5804 +▁trembled -10.5804 +▁shield -10.5806 +▁mingled -10.5806 +▁brick -10.5829 +▁harsh -10.583 +▁humor -10.5838 +▁mischief -10.584 +▁tremendous -10.584 +▁function -10.584 +▁smart -10.584 +▁sultan -10.5874 +▁dismiss -10.5874 +▁threatened -10.5875 +ji -10.5876 +▁cheap -10.5878 +▁vine -10.5878 +▁flock -10.5898 +▁endeavor -10.5908 +▁italy -10.5912 +▁flutter -10.5913 +▁whisk -10.5916 +▁waist -10.5922 +▁monarch -10.5943 +▁smoking -10.5943 +▁africa -10.5943 +▁accuse -10.5943 +▁herbert -10.5946 +▁refresh -10.5977 +▁rejoice -10.5977 +▁pillow -10.5979 +▁hopeless -10.5989 +▁poetry -10.5991 +▁perish -10.6007 +▁philosophy -10.6012 +▁bernard -10.6012 +▁whistle -10.6013 +▁lament -10.6014 +▁expectation -10.6028 +▁improve -10.6034 +▁fountain -10.6047 +▁perplex -10.6047 +▁despise -10.6047 +▁league -10.6047 +▁narrat -10.6047 +▁ignorance -10.6049 +▁reference -10.6051 +▁sunk -10.6052 +sail -10.6055 +▁wip -10.6057 +▁duck -10.6068 +▁partner -10.6076 +▁grove -10.6081 +▁prophet -10.6082 +▁shiver -10.6083 +▁neighbourhood -10.6083 +▁purse -10.6084 +▁representative -10.6084 +▁precisely -10.6104 +▁angle -10.6115 +▁acquired -10.6117 +▁chimney -10.6117 +▁doctrine -10.6117 +▁maxim -10.6117 +▁majority -10.6132 +▁autumn -10.6152 +▁cristo -10.6152 +▁disguise -10.6152 +▁achieve -10.6152 +▁confused -10.6152 +▁reduced -10.6152 +▁earlier -10.6155 +▁theatre -10.616 +▁decide -10.6172 +ological -10.6188 +▁continent -10.6188 +▁occupation -10.6188 +▁vigorous -10.6188 +▁decline -10.6188 +▁community -10.6193 +▁motionless -10.6198 +▁hatred -10.6205 +▁communication -10.6206 +▁determin -10.6218 +▁comment -10.6223 +▁approve -10.6223 +▁ceremony -10.6223 +▁criminal -10.6223 +▁scientific -10.6223 +▁duchess -10.6223 +▁vivid -10.6223 +▁shift -10.6223 +▁avail -10.6224 +▁bowl -10.6234 +▁johnson -10.6241 +▁contrast -10.6259 +▁slender -10.6259 +▁amusement -10.6259 +▁plot -10.6259 +▁damp -10.6261 +▁association -10.6294 +▁uncertain -10.6294 +▁snatch -10.6294 +▁pressure -10.6299 +▁apply -10.6306 +▁restless -10.6311 +▁perch -10.6315 +▁notwithstanding -10.633 +▁swung -10.633 +▁planet -10.633 +▁stirred -10.6337 +▁attendant -10.634 +▁thro -10.6354 +▁enjoyment -10.6364 +▁worry -10.6366 +▁albert -10.6366 +▁naked -10.6367 +▁talent -10.6372 +▁marian -10.6387 +▁reform -10.639 +▁lyn -10.6402 +▁deliberate -10.6402 +▁intelligent -10.6402 +▁sensitive -10.6402 +▁yonder -10.6402 +▁pupil -10.6402 +▁frightful -10.6409 +▁doubtful -10.6411 +▁standard -10.6423 +▁deposit -10.6439 +▁magistrate -10.6439 +▁shepherd -10.6439 +▁stomach -10.6439 +▁renew -10.6439 +▁hedge -10.6458 +▁possibility -10.6475 +▁fatigue -10.6475 +▁francs -10.6475 +▁portrait -10.6475 +▁resemble -10.6475 +▁favorite -10.6477 +▁cream -10.6491 +▁pope -10.651 +▁secretary -10.6524 +▁divers -10.6526 +▁activity -10.6548 +▁speculat -10.6548 +▁humour -10.6553 +▁fitted -10.6575 +▁external -10.6585 +▁cetera -10.6585 +▁wrapped -10.6586 +▁jaw -10.6612 +▁fred -10.6615 +▁examination -10.6622 +▁lodging -10.6622 +▁crow -10.6623 +▁owing -10.6625 +▁balance -10.6631 +▁puff -10.6644 +▁tenderness -10.6648 +▁porthos -10.6659 +▁anchor -10.666 +▁interrupt -10.6668 +▁driver -10.6689 +▁necessarily -10.6696 +▁perpetual -10.6696 +▁agony -10.6703 +▁scholar -10.6733 +▁scotland -10.6733 +▁suppress -10.6733 +▁wrath -10.6733 +▁wreck -10.6733 +▁exceed -10.6734 +▁perfection -10.6758 +▁doorway -10.6765 +▁india -10.6766 +▁clergy -10.6771 +▁tradition -10.6771 +▁section -10.6771 +▁eastern -10.6771 +▁wives -10.6774 +▁convention -10.6779 +▁announc -10.6782 +▁egypt -10.6797 +▁contradict -10.6808 +▁scratch -10.6808 +▁glove -10.6808 +▁central -10.6808 +▁wax -10.6826 +ifying -10.6831 +▁prepare -10.6833 +▁accompany -10.6846 +▁increasing -10.6846 +▁liberal -10.6846 +▁raising -10.6846 +▁orange -10.6847 +▁shoe -10.687 +▁attribute -10.6884 +▁literature -10.6884 +▁withdraw -10.6884 +▁hawk -10.6885 +thorpe -10.6886 +▁whither -10.6887 +▁moonlight -10.6887 +▁examine -10.6909 +▁happily -10.6922 +▁precede -10.6925 +▁detective -10.6927 +▁inches -10.6927 +▁solitary -10.696 +▁dutch -10.696 +▁napoleon -10.6998 +▁uneasy -10.6998 +▁cardinal -10.6998 +▁blew -10.6999 +▁fowl -10.6999 +▁decorat -10.6999 +▁childhood -10.7009 +▁torment -10.7012 +▁scent -10.7016 +▁losing -10.7024 +▁permission -10.7037 +▁blank -10.707 +▁upstairs -10.7075 +▁capacity -10.7075 +▁trifle -10.7076 +▁folly -10.7076 +▁remove -10.7102 +▁vengeance -10.7114 +▁enterprise -10.7114 +▁bedroom -10.7114 +▁anyhow -10.7114 +▁inquiry -10.7115 +▁ashes -10.714 +▁hush -10.7148 +▁awkward -10.7153 +▁saturday -10.7153 +▁genuine -10.7153 +▁surviv -10.7154 +▁drag -10.7156 +▁skirt -10.7156 +▁affectionate -10.7163 +▁tang -10.7179 +▁mutual -10.7192 +▁dispute -10.7192 +▁eagle -10.7192 +▁income -10.7193 +▁bind -10.7201 +▁wilt -10.7204 +▁fame -10.7206 +▁improvement -10.7208 +▁differ -10.7224 +▁awoke -10.7231 +▁sleeve -10.7231 +▁solitude -10.7231 +▁favourite -10.7234 +▁detect -10.7266 +▁comprehend -10.7271 +▁preparing -10.7271 +▁serpent -10.7271 +▁summit -10.7271 +▁knot -10.7271 +▁knit -10.7271 +▁copy -10.7271 +▁woe -10.7273 +▁stopping -10.7274 +▁faded -10.7274 +▁hideous -10.7279 +▁julie -10.7279 +▁shine -10.7306 +▁axe -10.731 +▁conflict -10.731 +▁proposition -10.731 +▁refuge -10.731 +▁gallery -10.731 +▁bundle -10.7311 +▁slavery -10.7324 +▁mask -10.733 +▁alyosha -10.735 +▁ladder -10.7359 +▁department -10.737 +▁discharge -10.739 +▁depress -10.739 +▁scarlet -10.7392 +▁gallop -10.7394 +▁kitty -10.7397 +▁paw -10.7403 +▁receiving -10.743 +▁surrender -10.743 +▁sustain -10.743 +▁twilight -10.743 +▁congress -10.743 +▁ireland -10.7431 +▁funny -10.7435 +▁lend -10.7459 +▁constitute -10.747 +▁crystal -10.747 +▁lofty -10.747 +▁funeral -10.747 +▁spain -10.747 +▁exceedingly -10.747 +▁damn -10.7473 +▁commun -10.7503 +▁prejudice -10.751 +▁porch -10.7511 +▁assistant -10.7515 +▁today -10.7521 +▁smot -10.7543 +▁enclos -10.7545 +▁industry -10.7551 +▁defence -10.7551 +▁hither -10.7554 +▁coloni -10.7567 +▁marguerite -10.7591 +▁miracle -10.7591 +▁inherit -10.7592 +▁beggar -10.7594 +▁unlike -10.7613 +▁envelope -10.7632 +▁indignation -10.7632 +▁natasha -10.7632 +▁proposal -10.7632 +▁fragment -10.7632 +▁roast -10.7634 +▁roused -10.7635 +encies -10.7651 +▁commenced -10.7673 +▁resource -10.7673 +▁population -10.7673 +▁quoth -10.7683 +▁tumble -10.7702 +▁pursue -10.7705 +▁educat -10.7706 +▁afflict -10.7714 +▁contact -10.7714 +▁crimson -10.7714 +▁division -10.7714 +▁disorder -10.7714 +▁copper -10.7715 +▁moderate -10.7716 +▁drum -10.772 +▁swim -10.7727 +▁salute -10.7732 +▁assume -10.7746 +▁nav -10.7747 +▁emphasi -10.7756 +▁overwhelm -10.7756 +▁shakespeare -10.7756 +▁struggling -10.7756 +▁tranquil -10.7756 +▁muscle -10.7756 +▁chicken -10.7756 +▁tread -10.7761 +▁claw -10.7764 +▁solicit -10.7766 +▁bible -10.778 +▁threat -10.7796 +▁velvet -10.7797 +▁exposed -10.7797 +▁idiot -10.7797 +▁barrel -10.7798 +▁ripe -10.7799 +▁penny -10.7809 +▁temptation -10.7822 +▁danglars -10.7839 +mbled -10.7841 +keep -10.7867 +▁chu -10.787 +▁centuries -10.7881 +▁distribut -10.7881 +▁reject -10.7881 +▁retorted -10.7881 +▁concentrat -10.7881 +▁cordial -10.7881 +▁motor -10.7882 +▁cannon -10.7884 +▁wretch -10.7905 +▁assurance -10.7923 +▁thief -10.7923 +▁survey -10.7923 +▁railway -10.7925 +▁vital -10.7925 +▁jackson -10.7933 +▁combat -10.7935 +▁recollection -10.7949 +▁security -10.7965 +▁nancy -10.7965 +▁jacob -10.7965 +▁clutch -10.7965 +▁growl -10.797 +▁blanket -10.7971 +▁cellar -10.7973 +▁indignant -10.8007 +▁convenient -10.8007 +▁worm -10.8008 +▁screen -10.8008 +▁coarse -10.8008 +▁transport -10.801 +▁determination -10.8019 +▁bullet -10.8019 +▁appreciate -10.805 +▁invisible -10.805 +▁devotion -10.805 +▁mixture -10.805 +▁candid -10.8051 +▁performance -10.8059 +▁rebel -10.8078 +▁exquisite -10.8093 +▁bargain -10.8093 +▁tobacco -10.8093 +▁loyal -10.8094 +▁mould -10.8094 +▁attentive -10.8135 +▁dorothy -10.8135 +▁brute -10.8136 +▁establishment -10.8145 +▁glen -10.8163 +▁inhabit -10.8179 +▁obscure -10.8179 +▁borrow -10.8179 +▁essence -10.8179 +▁dismay -10.8179 +hurst -10.8185 +▁vow -10.8195 +▁flee -10.82 +▁pluck -10.8222 +▁coffin -10.8222 +▁sunset -10.8224 +▁stephen -10.8226 +▁blade -10.8228 +▁holiday -10.8265 +▁mechanical -10.8265 +▁cotton -10.8266 +▁awakened -10.827 +hold -10.8309 +▁ridiculous -10.8309 +▁hesitation -10.8309 +▁corpse -10.8309 +▁saving -10.831 +▁sancho -10.831 +foot -10.8316 +▁eldest -10.8353 +▁peak -10.8374 +▁despite -10.8397 +▁edith -10.8397 +▁wilson -10.8397 +▁cherish -10.8397 +▁resistance -10.8403 +▁argue -10.8405 +▁inquire -10.8437 +▁apprehension -10.8441 +▁avenue -10.8441 +▁drake -10.8441 +▁propose -10.8446 +▁inferior -10.8486 +▁staircase -10.8486 +▁wherefore -10.8486 +▁carlyle -10.8486 +▁couch -10.8496 +▁route -10.8504 +▁politics -10.853 +▁tomorrow -10.853 +▁confined -10.8531 +▁naught -10.8531 +▁throng -10.8533 +▁sunlight -10.854 +▁imperfect -10.8575 +▁indifference -10.8575 +▁obedience -10.8575 +▁reception -10.8575 +▁turkey -10.8575 +▁vegetable -10.8575 +▁residence -10.8575 +▁violet -10.8575 +▁sarah -10.8575 +▁altar -10.8577 +▁grieve -10.8579 +▁jerk -10.8587 +▁magician -10.8589 +▁ensu -10.8609 +▁blossom -10.862 +▁lantern -10.862 +▁resolute -10.862 +▁thoughtfully -10.8621 +▁fortnight -10.8665 +▁trumpet -10.8665 +▁unwilling -10.8665 +▁valjean -10.8665 +▁lecture -10.8665 +▁whereupon -10.8665 +▁holland -10.8665 +▁creek -10.8666 +▁changing -10.8666 +▁slice -10.8666 +▁accent -10.8667 +▁normal -10.8667 +▁disagreeable -10.8711 +▁frederick -10.8711 +▁rubbed -10.8711 +▁dumb -10.8711 +▁establish -10.8736 +▁import -10.8754 +▁affirm -10.8757 +▁matthew -10.8757 +▁bunch -10.8757 +▁hoping -10.8758 +▁convert -10.8759 +▁brisk -10.8759 +▁bending -10.8763 +▁michael -10.8802 +▁mademoiselle -10.8802 +▁easier -10.8802 +▁facing -10.8803 +▁jones -10.8804 +▁excellency -10.8848 +▁literary -10.8849 +▁gossip -10.8849 +▁devour -10.8849 +▁stagger -10.8849 +▁pencil -10.8849 +▁average -10.8849 +▁hammer -10.8851 +▁triumphant -10.8855 +▁preferred -10.8855 +burn -10.8877 +▁application -10.8895 +▁occupy -10.8895 +▁authorities -10.8898 +▁ascertain -10.8941 +▁corridor -10.8941 +▁delicious -10.8941 +▁practise -10.8941 +▁universe -10.8941 +▁shilling -10.8941 +▁contest -10.8942 +▁ashore -10.8942 +▁commit -10.8983 +▁administration -10.8988 +▁studied -10.8988 +▁rigid -10.8988 +▁adorn -10.8989 +▁elsewhere -10.9035 +▁innocence -10.9035 +▁journal -10.9035 +▁landscape -10.9035 +▁telegraph -10.9035 +▁angrily -10.9035 +▁campaign -10.9035 +▁unjust -10.9035 +▁flourish -10.904 +▁challenge -10.9082 +▁torrent -10.9082 +▁relate -10.9127 +▁assembled -10.913 +▁impressed -10.913 +▁canoe -10.915 +▁conclud -10.9171 +▁quixote -10.9177 +▁satisfactory -10.9177 +▁niece -10.9177 +▁deaf -10.9178 +▁glid -10.9179 +▁jimmy -10.9179 +▁regulat -10.9179 +▁chatter -10.9215 +▁statue -10.9225 +▁glacier -10.9225 +▁envy -10.9225 +▁boston -10.9227 +▁richmond -10.9229 +▁denied -10.9229 +▁fanny -10.9232 +▁solomon -10.9273 +▁vulgar -10.9273 +▁stalk -10.9274 +▁spoon -10.9279 +▁abuse -10.928 +▁basin -10.9291 +▁feature -10.9293 +▁convict -10.9304 +▁admiral -10.9321 +▁architect -10.9321 +▁ribbon -10.9321 +▁permanent -10.9321 +▁april -10.9321 +▁jolly -10.9322 +borough -10.9322 +▁neighborhood -10.9323 +▁impart -10.9324 +▁horrid -10.937 +▁immortal -10.937 +▁penetrate -10.937 +▁prudence -10.937 +▁reconcil -10.937 +▁spaniard -10.937 +▁supposing -10.937 +▁telephone -10.937 +▁temperature -10.937 +▁oyster -10.937 +▁appointment -10.9375 +▁egyptian -10.9384 +▁dwelt -10.9419 +▁nephew -10.9419 +▁railroad -10.9419 +▁september -10.9419 +▁gilbert -10.9419 +▁wheat -10.9419 +▁device -10.9419 +▁squee -10.9453 +▁elegant -10.9468 +▁advertise -10.9517 +▁turtle -10.9517 +▁rational -10.9517 +▁brood -10.9519 +comb -10.9563 +▁assembly -10.9566 +▁cultivate -10.9566 +▁specimen -10.9566 +▁undoubtedly -10.9566 +▁editor -10.9567 +▁dropping -10.9567 +▁medical -10.9569 +▁balloon -10.9569 +▁whale -10.9574 +▁composition -10.9616 +▁footsteps -10.9616 +▁launcelot -10.9616 +▁discourse -10.9616 +▁errand -10.9616 +▁converse -10.9618 +▁advancing -10.9666 +▁downstairs -10.9666 +▁tumult -10.9666 +▁corrupt -10.9666 +▁suffice -10.9666 +▁anguish -10.9666 +▁shaggy -10.9666 +▁retire -10.9716 +▁timber -10.9717 +▁abstract -10.9767 +▁embroider -10.9767 +▁photograph -10.9767 +▁prosperity -10.9767 +▁terribly -10.9767 +▁territory -10.9767 +▁threshold -10.9767 +▁pavement -10.9767 +▁injured -10.9767 +▁levin -10.9767 +▁agitation -10.9818 +▁rascal -10.9818 +▁presume -10.9819 +▁strat -10.9842 +▁observing -10.9869 +▁obstacle -10.9869 +▁simplicity -10.9869 +▁slumber -10.9869 +▁supplied -10.9869 +▁combination -10.9869 +▁drain -10.9869 +▁wilderness -10.9869 +▁believing -10.992 +▁villain -10.992 +▁friday -10.992 +▁reckless -10.992 +▁injury -10.992 +▁clapp -10.9921 +▁symptom -10.9972 +▁kennedy -10.9972 +▁sledge -10.9972 +▁monday -10.9972 +▁hercules -10.9972 +▁ceiling -10.9972 +▁lemon -10.9972 +▁plague -10.9974 +▁canvas -10.9976 +▁impatience -11.0023 +▁uncomfortable -11.0023 +▁access -11.0023 +▁senator -11.0023 +▁swimming -11.0024 +▁barrier -11.0024 +▁adjust -11.0076 +▁comparison -11.0076 +▁proclaim -11.0076 +▁wrinkl -11.0076 +▁overlook -11.0076 +▁mitya -11.0076 +▁guilt -11.01 +▁distract -11.0128 +▁perception -11.0128 +▁precaution -11.0128 +▁spectator -11.0128 +▁surprising -11.0128 +▁disdain -11.0128 +▁bonnet -11.0128 +▁bapti -11.0129 +▁profess -11.0154 +▁inspector -11.018 +▁sketch -11.018 +▁structure -11.018 +▁ultimate -11.018 +▁confound -11.0181 +▁globe -11.0181 +▁insect -11.0181 +▁orchard -11.0181 +▁descent -11.0182 +▁amiable -11.0183 +▁independence -11.0233 +▁manufacture -11.0233 +▁sprinkle -11.0233 +▁nightingale -11.0233 +▁cushion -11.0233 +▁eminent -11.0233 +▁array -11.0234 +▁scott -11.0234 +▁troop -11.0234 +▁cosette -11.0234 +▁waving -11.0234 +▁irregular -11.0287 +▁persecut -11.0287 +▁derived -11.0287 +▁withdrew -11.0287 +▁caution -11.0287 +▁extract -11.0288 +▁suspicious -11.034 +▁memories -11.034 +▁nowhere -11.0341 +▁tremble -11.0343 +▁subtle -11.0343 +▁thorough -11.0349 +q -11.0372 +▁appropriate -11.0394 +▁slaughter -11.0394 +▁yourselves -11.0394 +▁thumb -11.0394 +▁twas -11.0394 +▁stray -11.0395 +▁abode -11.0395 +▁conspicuous -11.0448 +▁rebecca -11.0448 +▁sergeant -11.0448 +▁woke -11.0448 +▁apron -11.0451 +▁anticipate -11.0502 +▁discipline -11.0502 +▁glancing -11.0502 +▁pilgrim -11.0502 +▁sullen -11.0502 +▁contribute -11.0557 +▁prairie -11.0557 +▁carved -11.0559 +▁hypnoti -11.0612 +▁commerce -11.0612 +▁exclamation -11.0612 +▁muscular -11.0612 +▁november -11.0612 +▁phenomena -11.0612 +▁symbol -11.0612 +▁umbrella -11.0612 +▁diminish -11.0612 +▁parlour -11.0612 +▁threatening -11.0612 +▁stump -11.0612 +▁extensive -11.0667 +▁remembrance -11.0667 +▁combined -11.0667 +▁sheriff -11.0668 +▁laura -11.0673 +▁intercourse -11.0723 +▁supplies -11.0723 +▁landlord -11.0723 +▁stricken -11.0723 +▁shrink -11.0723 +▁caesar -11.0723 +▁drug -11.0726 +▁bewildered -11.0778 +▁commercial -11.0778 +▁nautilus -11.0778 +▁brutal -11.0779 +▁maggie -11.0779 +▁sphere -11.0779 +▁virgin -11.0816 +▁brethren -11.0835 +▁terrified -11.0835 +▁destiny -11.0835 +▁policy -11.0835 +▁housekeeper -11.0835 +▁ardent -11.0835 +▁discern -11.0836 +▁marquis -11.0836 +mouth -11.0854 +▁russia -11.0864 +▁wrap -11.0871 +▁britain -11.0891 +▁harbour -11.0891 +▁concert -11.0891 +▁harmony -11.0891 +▁donkey -11.0892 +▁damage -11.0892 +▁slim -11.0896 +about -11.0911 +▁luxury -11.0948 +▁paradise -11.0948 +▁culture -11.0948 +▁monstrous -11.0948 +▁tendency -11.0948 +▁julius -11.0948 +▁remedy -11.0948 +▁raoul -11.0948 +▁scold -11.0948 +▁decay -11.0948 +▁split -11.0949 +▁assault -11.1005 +▁december -11.1005 +▁moscow -11.1005 +▁explore -11.1005 +▁trousers -11.1005 +▁wrist -11.1006 +piece -11.1026 +▁tyrant -11.1063 +▁valentine -11.1063 +▁musket -11.1063 +▁abraham -11.1063 +▁strait -11.1063 +▁artificial -11.112 +▁faculty -11.112 +▁obligation -11.112 +▁resemblance -11.112 +▁inquiries -11.1121 +▁detain -11.1121 +▁swarm -11.1121 +▁pledge -11.1121 +▁admirable -11.1179 +▁defect -11.1179 +▁superintend -11.1179 +▁patriot -11.1179 +▁breton -11.1179 +▁dismal -11.1181 +▁recit -11.1191 +▁ignor -11.1232 +▁amelia -11.1237 +▁elephant -11.1296 +▁estimate -11.1296 +▁knelt -11.1296 +▁serving -11.1296 +▁shrill -11.1296 +▁text -11.1296 +▁studio -11.13 +▁alexander -11.1355 +▁wrought -11.1355 +▁abundant -11.1355 +▁situated -11.1355 +▁regain -11.1355 +▁sneer -11.1356 +▁sweat -11.1357 +▁wren -11.1359 +▁justify -11.138 +▁nigh -11.1409 +▁escort -11.1415 +▁inevitable -11.1415 +▁psmith -11.1415 +▁reluctant -11.1415 +▁preceding -11.1415 +▁resort -11.1415 +▁outrage -11.1419 +▁ambassador -11.1474 +▁consolation -11.1474 +▁remorse -11.1474 +▁behalf -11.1474 +▁formidable -11.1474 +▁gravity -11.1475 +▁apologi -11.1482 +▁divide -11.1484 +▁gigantic -11.1535 +▁october -11.1535 +▁flank -11.1535 +▁stooped -11.1535 +▁slew -11.1535 +▁confront -11.1535 +▁clara -11.1535 +▁film -11.1536 +▁bulk -11.1536 +dolph -11.1545 +▁eleanor -11.1595 +▁exclusive -11.1595 +▁japanese -11.1595 +▁sympathi -11.1595 +▁cavalry -11.1595 +▁perfume -11.1595 +▁federal -11.1595 +▁liquid -11.1595 +▁rubbing -11.1596 +▁oven -11.1597 +▁convuls -11.1656 +▁significant -11.1656 +▁deprived -11.1656 +▁responsibility -11.1656 +▁waistcoat -11.1656 +▁cluster -11.1656 +▁martha -11.1657 +▁attorney -11.1718 +▁droop -11.1718 +▁skilful -11.1718 +▁habitual -11.1718 +▁interven -11.1719 +▁owl -11.172 +▁conjecture -11.1779 +▁fantastic -11.1779 +▁responsible -11.1779 +▁destined -11.1779 +▁thereupon -11.1779 +▁goddess -11.178 +▁pacific -11.178 +▁warrant -11.178 +▁costume -11.178 +▁document -11.178 +▁bridle -11.1783 +▁california -11.1841 +▁democratic -11.1841 +▁eustace -11.1841 +▁squirrel -11.1841 +▁uncommon -11.1841 +▁plough -11.1841 +▁marvellous -11.1841 +▁tragedy -11.1841 +▁vault -11.1842 +▁hesitate -11.1853 +▁admiring -11.1904 +▁corporal -11.1904 +▁entitled -11.1904 +▁refrain -11.1904 +▁shrewd -11.1904 +▁strap -11.1927 +▁accurate -11.1967 +▁tempest -11.1967 +▁monument -11.1967 +▁siege -11.1967 +▁chinese -11.1967 +▁raven -11.1968 +▁loung -11.1969 +leigh -11.1985 +▁assassin -11.203 +▁inflict -11.203 +▁agitated -11.203 +▁desirable -11.203 +▁earliest -11.203 +▁launch -11.203 +▁pilot -11.2031 +▁pulse -11.2031 +▁liquor -11.2094 +▁scarecrow -11.2094 +▁skull -11.2094 +▁desolate -11.2094 +▁ticket -11.2094 +▁sublime -11.2094 +▁recess -11.2094 +▁serene -11.2094 +▁righteous -11.2094 +▁pinocchio -11.2158 +▁priscilla -11.2158 +▁charlotte -11.2158 +▁circular -11.2158 +▁injustice -11.2158 +▁thyself -11.2158 +▁occurrence -11.2158 +▁casual -11.2158 +▁trout -11.2158 +▁legend -11.2158 +▁fertil -11.2178 +▁background -11.2222 +▁comparatively -11.2222 +▁delicacy -11.2222 +▁estralla -11.2222 +▁manuscript -11.2222 +▁response -11.2222 +▁university -11.2222 +▁wolves -11.2222 +▁scandal -11.2222 +▁hoarse -11.2223 +▁stumble -11.2223 +▁convent -11.2272 +▁utili -11.2278 +▁examining -11.2287 +▁incapable -11.2287 +▁perceiving -11.2287 +▁philadelphia -11.2287 +▁subsequent -11.2287 +▁thieves -11.2287 +▁accumulat -11.2287 +▁damsel -11.2287 +▁scotch -11.2287 +▁underneath -11.2287 +▁smash -11.2287 +▁nobility -11.2287 +▁revolt -11.2288 +▁engage -11.229 +▁cathedral -11.2353 +▁despatch -11.2353 +▁eternity -11.2353 +▁january -11.2353 +▁probability -11.2353 +▁parallel -11.2353 +▁jimmie -11.2353 +▁champion -11.2353 +▁fisherman -11.2353 +▁jerry -11.2353 +▁swore -11.2353 +▁draught -11.2419 +▁opponent -11.2419 +▁primitive -11.2419 +▁significance -11.2419 +▁substantial -11.2419 +▁dunbar -11.2419 +▁commend -11.2419 +▁jasper -11.2419 +▁contemplate -11.2485 +▁testimony -11.2485 +▁imperial -11.2485 +▁adapt -11.2485 +▁juice -11.2485 +▁calamit -11.2489 +▁phoenix -11.2551 +▁prudent -11.2551 +▁solution -11.2551 +▁villefort -11.2551 +▁chateau -11.2551 +▁reaction -11.2551 +▁relax -11.2552 +▁quaint -11.2552 +▁plunder -11.2619 +▁distrust -11.2619 +▁prohibit -11.2619 +▁welfare -11.2619 +▁parlor -11.2619 +▁navigat -11.262 +▁tank -11.2624 +think -11.2657 +▁discourage -11.2686 +▁obstinate -11.2686 +▁rejoicing -11.2686 +▁vehicle -11.2686 +▁fancies -11.2686 +▁enlighten -11.2686 +▁sermon -11.2686 +▁illusion -11.2686 +▁anthea -11.2686 +▁martian -11.2688 +▁excite -11.2698 +▁attachment -11.2754 +▁generosity -11.2754 +▁unworthy -11.2754 +▁kettle -11.2754 +▁internal -11.2755 +▁incense -11.2756 +▁vibrat -11.2757 +▁adhere -11.2767 +▁february -11.2823 +▁incessant -11.2823 +▁mexican -11.2823 +▁interposed -11.2823 +▁granite -11.2823 +▁parcel -11.2823 +▁vexed -11.2823 +▁promote -11.2826 +▁debate -11.2839 +midst -11.2854 +▁cyril -11.2892 +▁embark -11.2892 +▁terrace -11.2892 +▁abundance -11.2892 +▁surgeon -11.2892 +▁aristocrat -11.2892 +▁literally -11.2892 +▁atlantic -11.2892 +▁martyr -11.2892 +▁senate -11.2892 +▁speck -11.2892 +▁loaf -11.2892 +vocation -11.2902 +▁administer -11.2961 +▁apprehend -11.2961 +▁elaborate -11.2961 +▁subdued -11.2961 +▁temporary -11.2961 +▁dominion -11.2961 +▁dignified -11.2961 +▁splash -11.2961 +▁conseil -11.2961 +▁dexter -11.2961 +▁unseen -11.2961 +▁tragic -11.2962 +ologist -11.3023 +▁sympathetic -11.3031 +▁bachelor -11.3031 +▁defense -11.3031 +▁excursion -11.3031 +▁faculties -11.3031 +▁proprietor -11.3031 +▁radiant -11.3031 +▁unnecessary -11.3031 +▁vacant -11.3031 +▁screw -11.3031 +▁ounce -11.3031 +▁gratify -11.3032 +▁calculated -11.3101 +▁keith -11.3101 +▁phenomenon -11.3101 +▁prominent -11.3101 +▁worried -11.3101 +▁climate -11.3101 +▁studies -11.3101 +▁aramis -11.3101 +▁bliss -11.3102 +▁contend -11.3102 +close -11.312 +▁continual -11.3127 +▁surpass -11.3172 +▁hebrew -11.3172 +▁identity -11.3172 +▁provoke -11.3172 +▁temperament -11.3172 +▁chariot -11.3172 +▁ninth -11.3172 +▁harbor -11.3173 +▁desirous -11.3244 +▁jerusalem -11.3244 +▁undertaking -11.3244 +▁chorus -11.3244 +▁scout -11.3244 +▁mirth -11.3244 +▁hymn -11.3244 +▁particle -11.3246 +▁apparatus -11.3316 +▁intelligible -11.3316 +▁invariably -11.3316 +▁pierced -11.3316 +▁review -11.3316 +▁flicker -11.3316 +▁exciting -11.3316 +▁gospel -11.3316 +▁dixon -11.3316 +▁revelation -11.3316 +▁constance -11.3316 +▁overtake -11.3316 +▁guinea -11.3316 +▁drap -11.3322 +▁precise -11.3343 +▁aladdin -11.3388 +▁chicago -11.3388 +▁tulliver -11.3388 +▁hamilton -11.3388 +▁garrison -11.3388 +▁disciple -11.3388 +▁intensity -11.3388 +▁traitor -11.3388 +▁chancellor -11.3388 +▁proverb -11.3388 +▁dagger -11.3389 +▁foresee -11.3399 +▁chauvelin -11.3461 +▁glimmer -11.3461 +▁volunteer -11.3461 +▁jungle -11.3461 +▁streak -11.3461 +▁sunrise -11.3461 +▁dissolv -11.3461 +▁confide -11.3482 +▁awhile -11.3535 +▁felicity -11.3535 +▁legislature -11.3535 +▁leonora -11.3535 +▁pitiful -11.3535 +▁colony -11.3535 +▁shawl -11.3536 +▁harmoni -11.3552 +▁arriving -11.3609 +▁carpenter -11.3609 +▁fundamental -11.3609 +▁overflow -11.3609 +▁expand -11.3609 +▁harvest -11.3609 +▁tidings -11.3609 +folk -11.3636 +▁feminine -11.3683 +▁innumerable -11.3683 +▁twentieth -11.3683 +▁trifling -11.3683 +▁ghastl -11.3683 +▁conquest -11.3683 +▁butterfly -11.3683 +▁daniel -11.3684 +▁scramble -11.3684 +▁facilit -11.3685 +▁forsake -11.3687 +▁behaviour -11.3759 +▁gorgeous -11.3759 +▁producing -11.3759 +▁happier -11.3759 +▁promising -11.3759 +▁rainbow -11.3759 +▁instinctively -11.3759 +▁decree -11.376 +▁copie -11.3764 +▁strew -11.3765 +▁eyebrows -11.3834 +▁irresistible -11.3834 +▁pharaoh -11.3834 +▁scrooge -11.3834 +▁unnatural -11.3834 +▁crumbs -11.3834 +▁refined -11.3834 +▁dreary -11.3834 +▁trench -11.3835 +▁clair -11.3838 +▁convince -11.386 +▁fringe -11.3877 +▁extremity -11.3911 +▁intimacy -11.3911 +▁scoundrel -11.3911 +▁suffrage -11.3911 +▁uneasiness -11.3911 +▁barricade -11.3911 +▁circulat -11.3911 +▁samuel -11.3911 +▁bruce -11.3911 +▁spake -11.3911 +▁ambitious -11.3988 +▁energetic -11.3988 +▁splendor -11.3988 +▁tuesday -11.3988 +▁virtuous -11.3988 diff --git a/modules/audio/asr/u2_conformer_librispeech/assets/data/mean_std.json b/modules/audio/asr/u2_conformer_librispeech/assets/data/mean_std.json new file mode 100644 index 0000000000000000000000000000000000000000..c42cf7fbc3b12dd25e05f218a091c88bf93f4a6b --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/assets/data/mean_std.json @@ -0,0 +1 @@ +{"mean_stat": [3419817384.9589553, 3554070049.1888413, 3818511309.9166613, 4066044518.3850017, 4291564631.2871633, 4447813845.146345, 4533096457.680424, 4535743891.989957, 4529762966.952207, 4506798370.255702, 4563810141.721841, 4621582319.277632, 4717208210.814803, 4782916961.295261, 4800534153.252695, 4816978042.979026, 4813370098.242317, 4783029495.131413, 4797780594.144404, 4697681126.278327, 4615891408.325888, 4660549391.6024275, 4576180438.146472, 4609080513.250168, 4575296489.058092, 4602504837.872262, 4568039825.650208, 4596829549.204861, 4590634987.343898, 4604371982.549804, 4623782318.317643, 4643582410.8842745, 4681460771.788484, 4759470876.31175, 4808639788.683043, 4828470941.416027, 4868984035.113543, 4906503986.801533, 4945995579.443381, 4936645225.986488, 4975902400.919519, 4960230208.656678, 4986734786.199859, 4983472199.8246765, 5002204376.162232, 5030432036.352981, 5060386169.086892, 5093482058.577236, 5118330657.308789, 5137270836.326198, 5140137363.319094, 5144296534.330122, 5158812605.654329, 5166263515.51458, 5156261604.282723, 5155820011.532965, 5154511256.8968, 5152063882.193671, 5153425524.412178, 5149000486.683038, 5154587156.35868, 5134412165.07972, 5092874838.792056, 5062281231.5140915, 5029059442.072953, 4996045017.917702, 4962203662.170533, 4928110046.282831, 4900476581.092096, 4881407033.533021, 4859626116.955097, 4851430742.3865795, 4850317443.454599, 4848197040.155383, 4837178106.464577, 4818448202.7298765, 4803345264.527405, 4765785994.104498, 4735296707.352132, 4699957946.40757], "var_stat": [39487786239.20539, 42865198005.60155, 49718916704.468704, 55953639455.490585, 62156293826.00315, 66738657819.12445, 69416921986.47835, 69657873431.17258, 69240303799.53061, 68286972351.43054, 69718367152.18843, 71405427710.7103, 74174200331.87572, 76047347951.43869, 76478048614.40665, 76810929560.19212, 76540466184.85634, 75538479521.34026, 75775624554.07217, 72775991318.16557, 70350402972.93352, 71358602366.48341, 68872845697.9878, 69552396791.49916, 68471390455.59991, 69022047288.07498, 67982260910.11236, 68656154716.71916, 68461419064.9241, 68795285460.65717, 69270474608.52791, 69754495937.76433, 70596044579.14969, 72207936275.97945, 73629619360.65047, 74746445259.57487, 75925168496.81197, 76973508692.04265, 78074337163.3413, 77765963787.96971, 78839167623.49733, 78328768943.2287, 79016127287.03778, 78922638306.99306, 79489768324.9408, 80354861037.44005, 81311991408.12526, 82368205917.26112, 83134782296.1741, 83667769421.23245, 83673751953.46239, 83806087685.62842, 84193971202.07523, 84424752763.34825, 84092846117.64104, 84039114093.08766, 83982515225.7085, 83909645482.75613, 83947278563.15077, 83800767707.19617, 83851106027.8772, 83089292432.37892, 82056425825.3622, 81138570746.92316, 80131843258.75557, 79130160837.19037, 78092166878.71533, 77104785522.79205, 76308548392.10454, 75709445890.58063, 75084778641.6033, 74795849006.19067, 74725807683.832, 74645651838.2169, 74300193368.39339, 73696619147.86806, 73212785808.97992, 72240491743.0697, 71420246227.32545, 70457076435.4593], "frame_num": 345484372} diff --git a/modules/audio/asr/u2_conformer_librispeech/assets/data/vocab.txt b/modules/audio/asr/u2_conformer_librispeech/assets/data/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..62d35f25e95fd80599bda9c14238797468a319b1 --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/assets/data/vocab.txt @@ -0,0 +1,5002 @@ + + +' +a +abeth +ability +able +ably +about +ac +ach +acious +ad +ade +ag +age +ah +ak +al +ally +am +an +ance +and +ang +ans +ant +ap +ar +ard +aries +artagnan +ary +as +at +ate +ated +ath +ating +ation +ations +ative +ator +atory +au +av +aw +ay +b +ba +bbe +bble +be +bel +ber +bi +ble +bo +board +borough +bra +bu +burg +burn +bury +by +c +ca +car +cast +ce +cent +ch +cha +che +ched +chi +cho +ci +ck +clock +close +co +comb +con +ctor +cu +cum +cy +d +da +dding +ddle +de +den +der +do +dolph +dy +e +ea +ed +ef +el +ella +em +ement +en +ence +encies +ened +ens +ent +er +ers +es +est +et +eth +ett +ette +ev +ever +ex +ey +f +fa +fall +fe +fer +ff +fi +field +fold +folk +foot +for +ford +form +ft +ful +g +ga +gan +gar +gate +ge +ged +gen +ger +gg +gi +ging +gn +go +gra +gu +gue +h +ha +ham +han +har +he +head +hen +her +hi +hin +ho +hold +hood +house +hu +hurst +hy +i +ia +ial +ian +ians +ib +ible +ic +ical +ick +id +ie +ied +ier +ies +if +ification +ified +ifying +ig +ight +ign +il +ile +ility +ily +im +in +ina +ine +iness +ing +io +ion +ions +ious +ip +ir +ire +is +ish +ism +ison +ist +istic +ists +it +itch +ite +ities +itude +ity +ium +ius +ive +j +ja +ji +jo +ju +k +ka +ke +keep +ker +ki +kin +king +ko +ky +l +la +lac +lan +land +lar +ld +le +led +leigh +ler +les +less +let +ley +lf +li +lie +light +like +lin +line +liness +ling +ll +lo +lock +lon +long +low +lt +lung +lus +ly +m +ma +man +mbled +me +men +ment +ments +mer +mi +midst +mmed +mo +mond +mont +more +most +mouth +mp +my +n +na +nce +nd +ne +ned +ner +ness +ney +ng +ni +nic +ning +nnie +nny +no +nt +ny +o +oc +od +og +ol +ological +ologist +ology +om +on +one +oo +ook +oon +op +or +ord +ors +ory +os +ot +ou +our +ous +ov +ow +p +pa +pe +pec +ped +per +pha +piece +ple +po +port +pose +pp +pping +ps +q +qua +que +qui +r +ra +ran +rate +re +red +ress +rg +ri +ric +rick +ridge +ries +right +rin +ring +ris +rk +rn +ro +ron +rous +row +rs +rt +ru +ry +s +sail +se +sh +ship +shire +side +some +son +st +stead +ster +stone +stra +t +ta +tan +te +ted +ten +ter +terior +th +the +ther +think +thorpe +ti +tic +ties +time +tin +ting +tion +to +ton +tri +tro +tte +ttered +ttle +tur +ty +u +ub +uc +uch +ud +ug +ugh +ul +ulation +um +un +und +uous +up +ur +ure +us +use +ut +ux +v +va +val +van +ve +ver +vi +ville +vo +vocation +w +wa +war +ward +way +we +well +wi +wick +win +wn +wood +work +worth +x +y +z +zz +▁ +▁a +▁ab +▁abandon +▁able +▁abode +▁about +▁above +▁abraham +▁abroad +▁absence +▁absent +▁absolute +▁absolutely +▁absorb +▁abstract +▁absurd +▁abundance +▁abundant +▁abuse +▁accent +▁accept +▁accepted +▁access +▁accident +▁accompanied +▁accompany +▁accomplish +▁accord +▁according +▁accordingly +▁account +▁accumulat +▁accurate +▁accuse +▁accustomed +▁achieve +▁acknowledg +▁acquaintance +▁acquainted +▁acquired +▁across +▁act +▁action +▁active +▁activity +▁actual +▁actually +▁adam +▁adapt +▁add +▁added +▁addition +▁address +▁addressed +▁adhere +▁adjust +▁administer +▁administration +▁admirable +▁admiral +▁admiration +▁admire +▁admiring +▁admit +▁admitted +▁adopt +▁adorn +▁advance +▁advanced +▁advancing +▁advantage +▁adventure +▁advertise +▁advice +▁advise +▁affair +▁affairs +▁affect +▁affected +▁affection +▁affectionate +▁affirm +▁afflict +▁afford +▁afraid +▁africa +▁after +▁afternoon +▁afterward +▁afterwards +▁again +▁against +▁age +▁agent +▁agitated +▁agitation +▁ago +▁agony +▁agree +▁agreeable +▁agreed +▁ah +▁ahead +▁aid +▁aim +▁air +▁al +▁aladdin +▁alarm +▁alas +▁albert +▁alexander +▁alice +▁alive +▁all +▁allow +▁allowed +▁almost +▁alone +▁along +▁aloud +▁already +▁also +▁altar +▁alter +▁although +▁altogether +▁always +▁alyosha +▁am +▁ama +▁ambassador +▁ambition +▁ambitious +▁amelia +▁america +▁american +▁amiable +▁amid +▁among +▁amount +▁amusement +▁an +▁anchor +▁ancient +▁and +▁andrew +▁angel +▁anger +▁angle +▁angrily +▁angry +▁anguish +▁animal +▁animals +▁anna +▁anne +▁announc +▁announced +▁another +▁answer +▁answered +▁anthea +▁anti +▁anticipate +▁anxiety +▁anxious +▁any +▁anybody +▁anyhow +▁anyone +▁anything +▁anywhere +▁apart +▁apartment +▁apologi +▁apparatus +▁apparent +▁apparently +▁appeal +▁appear +▁appearance +▁appeared +▁appetite +▁apple +▁application +▁applied +▁apply +▁appointed +▁appointment +▁appreciate +▁apprehend +▁apprehension +▁approach +▁approached +▁approaching +▁appropriate +▁approve +▁april +▁apron +▁apt +▁ar +▁arab +▁aramis +▁arch +▁architect +▁ardent +▁are +▁argue +▁argument +▁arise +▁aristocrat +▁arm +▁arms +▁army +▁arose +▁around +▁arranged +▁arrangement +▁array +▁arrest +▁arrival +▁arrive +▁arrived +▁arriving +▁arrow +▁art +▁arthur +▁article +▁artificial +▁artist +▁as +▁ascend +▁ascertain +▁ashamed +▁ashes +▁ashore +▁aside +▁ask +▁asked +▁asking +▁asleep +▁aspect +▁assassin +▁assault +▁assembled +▁assembly +▁assert +▁assist +▁assistance +▁assistant +▁associate +▁association +▁assume +▁assumed +▁assurance +▁assure +▁assured +▁astonished +▁astonishment +▁at +▁atlantic +▁atmosphere +▁attached +▁attachment +▁attack +▁attain +▁attempt +▁attend +▁attendant +▁attention +▁attentive +▁attitude +▁attorney +▁attract +▁attribute +▁audience +▁august +▁aunt +▁author +▁authorities +▁authority +▁autumn +▁avail +▁avenue +▁average +▁avoid +▁await +▁awake +▁awakened +▁aware +▁away +▁awful +▁awhile +▁awkward +▁awoke +▁axe +▁b +▁ba +▁baby +▁bachelor +▁back +▁background +▁backward +▁bad +▁bade +▁bag +▁bake +▁bal +▁balance +▁ball +▁balloon +▁ban +▁band +▁bank +▁bapti +▁bar +▁barbar +▁bare +▁bargain +▁bark +▁baron +▁barrel +▁barricade +▁barrier +▁base +▁basin +▁basket +▁bath +▁batter +▁battle +▁bay +▁be +▁bear +▁beard +▁bearing +▁beast +▁beat +▁beaten +▁beautiful +▁beauty +▁became +▁because +▁become +▁becoming +▁bed +▁bedroom +▁been +▁before +▁beg +▁began +▁beggar +▁begged +▁begin +▁beginning +▁begun +▁behalf +▁behave +▁behaviour +▁beheld +▁behind +▁behold +▁being +▁belief +▁believe +▁believed +▁believing +▁bell +▁belong +▁beloved +▁below +▁bench +▁bending +▁beneath +▁benefit +▁bent +▁bernard +▁beside +▁besides +▁best +▁bestow +▁betray +▁better +▁between +▁bewildered +▁beyond +▁bi +▁bible +▁bid +▁big +▁bill +▁billy +▁bind +▁bird +▁birds +▁birth +▁bishop +▁bit +▁bitter +▁bla +▁black +▁blade +▁blame +▁blank +▁blanket +▁bless +▁blew +▁blind +▁bliss +▁block +▁blood +▁bloom +▁blossom +▁blow +▁blu +▁blue +▁blush +▁bo +▁board +▁boast +▁boat +▁bob +▁bodies +▁body +▁boil +▁bold +▁bolt +▁bon +▁bond +▁bonnet +▁book +▁books +▁boot +▁boots +▁border +▁bore +▁born +▁borne +▁borrow +▁bosom +▁boston +▁both +▁bottle +▁bottom +▁bought +▁bound +▁bow +▁bowed +▁bowl +▁box +▁boy +▁boys +▁bra +▁brain +▁branch +▁branches +▁brand +▁brave +▁bread +▁break +▁breakfast +▁breaking +▁breast +▁breath +▁bree +▁brethren +▁breton +▁bri +▁brick +▁bride +▁bridge +▁bridle +▁brief +▁brig +▁bright +▁brilliant +▁bring +▁bringing +▁brisk +▁britain +▁british +▁bro +▁broad +▁broke +▁broken +▁brood +▁brook +▁brother +▁brothers +▁brought +▁brow +▁brown +▁bruce +▁brush +▁brutal +▁brute +▁bu +▁buck +▁build +▁building +▁built +▁bulk +▁bull +▁bullet +▁bunch +▁bundle +▁bur +▁burden +▁buried +▁burn +▁burning +▁burst +▁bush +▁bushes +▁business +▁busy +▁but +▁butter +▁butterfly +▁buy +▁by +▁c +▁ca +▁cab +▁cabin +▁caesar +▁cake +▁cal +▁calamit +▁calculated +▁california +▁call +▁called +▁calling +▁calm +▁came +▁camp +▁campaign +▁can +▁candid +▁candle +▁cannon +▁cannot +▁canoe +▁canvas +▁cap +▁capable +▁capacity +▁capital +▁captain +▁capture +▁car +▁card +▁cardinal +▁care +▁careful +▁carefully +▁careless +▁carlyle +▁carpenter +▁carpet +▁carr +▁carriage +▁carried +▁carry +▁carrying +▁cart +▁carved +▁case +▁cast +▁castle +▁casual +▁cat +▁catch +▁cathedral +▁catherine +▁catholic +▁cattle +▁caught +▁cause +▁caused +▁caution +▁cavalry +▁cave +▁ce +▁cease +▁ceased +▁ceiling +▁celebrat +▁cell +▁cellar +▁cent +▁center +▁central +▁centre +▁centuries +▁century +▁ceremony +▁certain +▁certainly +▁cetera +▁ch +▁cha +▁chain +▁chair +▁challenge +▁chamber +▁champion +▁chance +▁chancellor +▁change +▁changed +▁changing +▁channel +▁chap +▁chapter +▁char +▁character +▁characteristic +▁charge +▁chariot +▁charles +▁charlotte +▁charm +▁charming +▁chase +▁chateau +▁chatter +▁chauvelin +▁che +▁cheap +▁check +▁cheek +▁cheeks +▁cheer +▁cheerful +▁cheese +▁cherish +▁chest +▁chi +▁chicago +▁chicken +▁chief +▁child +▁childhood +▁children +▁chill +▁chimney +▁chin +▁china +▁chinese +▁choice +▁choose +▁chop +▁chorus +▁chose +▁chosen +▁chris +▁christ +▁christian +▁christmas +▁chu +▁chuck +▁church +▁cigar +▁circle +▁circular +▁circulat +▁circumstance +▁circumstances +▁citi +▁cities +▁city +▁civil +▁civili +▁cl +▁claim +▁clair +▁clapp +▁clara +▁clasp +▁class +▁classes +▁claw +▁clay +▁clean +▁clear +▁clearly +▁clergy +▁clerk +▁clever +▁cliff +▁climate +▁climb +▁clo +▁cloak +▁clock +▁close +▁closed +▁closely +▁cloth +▁clothes +▁cloud +▁clouds +▁club +▁cluster +▁clutch +▁co +▁coach +▁coal +▁coarse +▁coast +▁coat +▁cock +▁coffee +▁coffin +▁coin +▁col +▁cold +▁collar +▁collect +▁college +▁colonel +▁coloni +▁colony +▁color +▁colour +▁column +▁com +▁comb +▁combat +▁combination +▁combined +▁come +▁comes +▁comfort +▁comfortable +▁coming +▁command +▁commenced +▁commend +▁comment +▁commerce +▁commercial +▁commission +▁commit +▁committed +▁committee +▁common +▁commun +▁communicat +▁communication +▁community +▁comp +▁companion +▁companions +▁company +▁comparatively +▁compare +▁comparison +▁compass +▁compelled +▁complain +▁complete +▁completely +▁complex +▁compliment +▁composed +▁composition +▁comprehend +▁comrade +▁con +▁conceal +▁conceive +▁concentrat +▁conception +▁concern +▁concerned +▁concerning +▁concert +▁conclud +▁concluded +▁conclusion +▁condemn +▁condition +▁conditions +▁conduct +▁conf +▁confess +▁confide +▁confidence +▁confident +▁confined +▁confirm +▁conflict +▁confound +▁confront +▁confused +▁confusion +▁congress +▁conjecture +▁connected +▁connection +▁conquer +▁conquest +▁conscience +▁conscious +▁consciousness +▁conseil +▁consent +▁consequence +▁consequently +▁consider +▁considerable +▁consideration +▁considered +▁consist +▁consolation +▁conspicuous +▁constance +▁constant +▁constantly +▁constitute +▁constitution +▁construct +▁consult +▁consum +▁contact +▁contain +▁contemplate +▁contempt +▁contend +▁content +▁contest +▁continent +▁continual +▁continually +▁continue +▁continued +▁contract +▁contradict +▁contrary +▁contrast +▁contribute +▁control +▁convenient +▁convent +▁convention +▁conversation +▁converse +▁convert +▁convey +▁convict +▁conviction +▁convince +▁convinced +▁convuls +▁cook +▁cool +▁copie +▁copper +▁copy +▁cor +▁cordial +▁corn +▁corner +▁corporal +▁corpse +▁correct +▁correspond +▁corridor +▁corrupt +▁cosette +▁cost +▁costume +▁cottage +▁cotton +▁couch +▁could +▁couldn +▁council +▁counsel +▁count +▁countenance +▁counter +▁countess +▁countries +▁country +▁couple +▁courage +▁course +▁court +▁cousin +▁cover +▁covered +▁cow +▁coward +▁cra +▁crack +▁craft +▁crawl +▁cre +▁cream +▁created +▁creature +▁creatures +▁credit +▁creek +▁creep +▁crep +▁crew +▁cried +▁cries +▁crime +▁criminal +▁crimson +▁cristo +▁critic +▁cro +▁cross +▁crossed +▁crow +▁crowd +▁crown +▁cru +▁cruel +▁crumbs +▁crush +▁cry +▁crying +▁crystal +▁cu +▁cultivate +▁culture +▁cunning +▁cup +▁cur +▁curiosity +▁curious +▁curl +▁current +▁curse +▁curtain +▁cushion +▁custom +▁cut +▁cutting +▁cyril +▁d +▁da +▁dagger +▁daily +▁damage +▁damn +▁damp +▁damsel +▁dan +▁dance +▁dancing +▁danger +▁dangerous +▁danglars +▁daniel +▁dar +▁dare +▁dared +▁dark +▁darkness +▁darling +▁dash +▁date +▁daughter +▁david +▁dawn +▁day +▁days +▁de +▁dead +▁deaf +▁deal +▁dear +▁dearest +▁death +▁debate +▁debt +▁decay +▁deceive +▁december +▁decide +▁decided +▁decision +▁deck +▁declare +▁declared +▁decline +▁decorat +▁decree +▁deep +▁deeply +▁defeat +▁defect +▁defence +▁defend +▁defense +▁defi +▁definite +▁degree +▁delay +▁deliberate +▁delicacy +▁delicate +▁delicious +▁delight +▁delighted +▁delightful +▁deliver +▁demand +▁demanded +▁democratic +▁demon +▁den +▁denied +▁deny +▁depart +▁department +▁departure +▁depend +▁deposit +▁depress +▁deprived +▁depth +▁derived +▁descend +▁descended +▁descent +▁describe +▁described +▁description +▁desert +▁deserve +▁design +▁desirable +▁desire +▁desired +▁desirous +▁desk +▁desolate +▁despair +▁despatch +▁desperate +▁despise +▁despite +▁destined +▁destiny +▁destroy +▁destroyed +▁destruction +▁detail +▁detain +▁detect +▁detective +▁determin +▁determination +▁determined +▁develop +▁development +▁device +▁devil +▁devoted +▁devotion +▁devour +▁dexter +▁di +▁diamond +▁diana +▁dick +▁did +▁didn +▁die +▁died +▁differ +▁difference +▁different +▁difficult +▁difficulties +▁difficulty +▁dig +▁dignified +▁dignity +▁dim +▁diminish +▁din +▁dinner +▁direct +▁directed +▁direction +▁directly +▁dirty +▁dis +▁disagreeable +▁disappear +▁disappeared +▁disappoint +▁disappointment +▁disc +▁discern +▁discharge +▁disciple +▁discipline +▁discourage +▁discourse +▁discover +▁discovered +▁discovery +▁discuss +▁discussion +▁disdain +▁disease +▁disgrace +▁disguise +▁disgust +▁dish +▁dislike +▁dismal +▁dismay +▁dismiss +▁disorder +▁display +▁disposed +▁disposition +▁dispute +▁dissolv +▁distance +▁distant +▁distinct +▁distinction +▁distinguish +▁distinguished +▁distract +▁distress +▁distribut +▁district +▁distrust +▁disturb +▁div +▁divers +▁divide +▁divided +▁divine +▁division +▁dixon +▁do +▁doctor +▁doctrine +▁document +▁does +▁doesn +▁dog +▁dogs +▁doing +▁dollars +▁domestic +▁dominion +▁don +▁done +▁donkey +▁door +▁doors +▁doorway +▁dorothy +▁double +▁doubt +▁doubtful +▁doubtless +▁down +▁downstairs +▁drag +▁dragg +▁dragon +▁drain +▁drake +▁drama +▁drank +▁drap +▁draught +▁draw +▁drawing +▁drawn +▁dread +▁dreadful +▁dream +▁dreary +▁dress +▁dressed +▁drew +▁dri +▁drift +▁drink +▁drive +▁driven +▁driver +▁driving +▁droop +▁drop +▁dropped +▁dropping +▁drove +▁drown +▁drug +▁drum +▁drunk +▁dry +▁du +▁duchess +▁duck +▁due +▁duke +▁dull +▁dumb +▁dun +▁dunbar +▁dur +▁dusk +▁dust +▁dutch +▁duties +▁duty +▁dwarf +▁dwell +▁dwelt +▁dying +▁e +▁each +▁eager +▁eagerly +▁eagle +▁ear +▁earl +▁earlier +▁earliest +▁early +▁earn +▁earnest +▁ears +▁earth +▁ease +▁easier +▁easily +▁east +▁eastern +▁easy +▁eat +▁eaten +▁eating +▁echo +▁edge +▁edith +▁editor +▁educat +▁education +▁edward +▁effect +▁effort +▁eggs +▁egypt +▁egyptian +▁eight +▁eighteen +▁eighty +▁either +▁el +▁elaborate +▁elbow +▁elder +▁eldest +▁eleanor +▁elect +▁electric +▁elegant +▁element +▁elephant +▁eleven +▁eli +▁else +▁elsewhere +▁elsie +▁em +▁embark +▁embarrass +▁embrace +▁embroider +▁emerg +▁emily +▁eminent +▁emotion +▁emperor +▁emphasi +▁empire +▁employ +▁employed +▁empty +▁en +▁enable +▁enchant +▁enclos +▁encounter +▁encourage +▁end +▁endeavor +▁endeavour +▁endure +▁enemies +▁enemy +▁energetic +▁energy +▁engage +▁engaged +▁engagement +▁engine +▁england +▁english +▁enjoy +▁enjoyment +▁enlighten +▁enormous +▁enough +▁ensu +▁enter +▁entered +▁enterprise +▁entertain +▁enthusiasm +▁entire +▁entirely +▁entitled +▁entrance +▁entreat +▁envelope +▁envy +▁epi +▁equal +▁equally +▁er +▁ere +▁erect +▁errand +▁error +▁escape +▁escaped +▁escort +▁especially +▁essence +▁essential +▁establish +▁established +▁establishment +▁estate +▁esteem +▁estimate +▁estralla +▁eternal +▁eternity +▁europe +▁eustace +▁eva +▁even +▁evening +▁events +▁ever +▁every +▁everybody +▁everyone +▁everything +▁everywhere +▁evidence +▁evident +▁evidently +▁evil +▁ex +▁exact +▁exactly +▁examination +▁examine +▁examined +▁examining +▁example +▁exceed +▁exceedingly +▁excellency +▁excellent +▁except +▁exception +▁excess +▁exchange +▁excite +▁excited +▁excitement +▁exciting +▁exclaimed +▁exclamation +▁exclusive +▁excursion +▁excuse +▁execut +▁execution +▁exercise +▁exhaust +▁exhibit +▁exist +▁existence +▁expand +▁expect +▁expectation +▁expected +▁expedition +▁expense +▁experience +▁experiment +▁explain +▁explained +▁explanation +▁explore +▁exposed +▁express +▁expressed +▁expression +▁exquisite +▁extend +▁extended +▁extensive +▁extent +▁external +▁extra +▁extract +▁extraordinary +▁extreme +▁extremely +▁extremity +▁eye +▁eyebrows +▁eyes +▁f +▁fa +▁face +▁faces +▁facilit +▁facing +▁fact +▁faculties +▁faculty +▁faded +▁fail +▁failed +▁failure +▁faint +▁fair +▁fairly +▁fairy +▁faith +▁faithful +▁fall +▁fallen +▁falling +▁false +▁fame +▁familiar +▁families +▁family +▁famous +▁fan +▁fancied +▁fancies +▁fancy +▁fanny +▁fantastic +▁far +▁farewell +▁farm +▁farmer +▁farther +▁fashion +▁fast +▁fastened +▁fat +▁fatal +▁fate +▁father +▁fatigue +▁fault +▁favor +▁favorite +▁favour +▁favourite +▁fe +▁fear +▁fearful +▁feast +▁feather +▁feature +▁features +▁february +▁federal +▁feeble +▁feed +▁feel +▁feeling +▁feelings +▁feet +▁felicity +▁fell +▁fellow +▁felt +▁female +▁feminine +▁fence +▁fer +▁fertil +▁fetch +▁fever +▁few +▁fi +▁field +▁fields +▁fierce +▁fifteen +▁fifth +▁fifty +▁fight +▁fighting +▁figure +▁fill +▁filled +▁film +▁fin +▁final +▁finally +▁find +▁finding +▁fine +▁finger +▁fingers +▁finish +▁finished +▁fire +▁firm +▁firmly +▁first +▁fish +▁fisherman +▁fit +▁fitted +▁five +▁fix +▁fixed +▁fl +▁flag +▁flame +▁flank +▁flash +▁flat +▁flatter +▁fled +▁flee +▁fleet +▁flesh +▁flew +▁flicker +▁flight +▁flo +▁flock +▁flood +▁floor +▁florence +▁flour +▁flourish +▁flow +▁flower +▁flowers +▁flu +▁flutter +▁fly +▁flying +▁fo +▁fog +▁fold +▁folk +▁follow +▁followed +▁following +▁folly +▁fond +▁food +▁fool +▁foolish +▁foot +▁footsteps +▁for +▁forbid +▁force +▁forced +▁fore +▁forehead +▁foreign +▁foresee +▁forest +▁forget +▁forgive +▁forgot +▁forgotten +▁form +▁formed +▁former +▁formidable +▁forsake +▁forth +▁fortnight +▁fortunate +▁fortune +▁forty +▁forward +▁fought +▁found +▁fountain +▁four +▁fourteen +▁fourth +▁fowl +▁fox +▁fra +▁fragment +▁frame +▁france +▁francis +▁francs +▁frank +▁fred +▁frederick +▁free +▁freedom +▁french +▁frequent +▁frequently +▁fresh +▁fri +▁friday +▁friend +▁friendly +▁friends +▁friendship +▁fright +▁frightened +▁frightful +▁fringe +▁fro +▁frog +▁from +▁front +▁frown +▁fruit +▁fu +▁fulfil +▁full +▁fully +▁fun +▁function +▁fundamental +▁funeral +▁funny +▁fur +▁furious +▁furnish +▁furniture +▁further +▁future +▁g +▁ga +▁gain +▁gained +▁gall +▁gallant +▁gallery +▁gallop +▁game +▁gar +▁garden +▁garrison +▁gasp +▁gate +▁gather +▁gathered +▁gave +▁gay +▁ge +▁gen +▁general +▁generally +▁generation +▁generosity +▁generous +▁genius +▁gentle +▁gentleman +▁gentlemen +▁gently +▁genuine +▁george +▁ger +▁german +▁gesture +▁get +▁getting +▁ghastl +▁ghost +▁gi +▁giant +▁gift +▁gigantic +▁gil +▁gilbert +▁girl +▁girls +▁give +▁given +▁giving +▁gla +▁glacier +▁glad +▁glance +▁glancing +▁glass +▁gleam +▁glen +▁glid +▁glimmer +▁glimpse +▁glitter +▁globe +▁gloom +▁gloomy +▁glorious +▁glory +▁glove +▁glow +▁go +▁goat +▁god +▁goddess +▁goes +▁going +▁gold +▁golden +▁gone +▁good +▁gorgeous +▁gospel +▁gossip +▁got +▁govern +▁government +▁governor +▁gown +▁gra +▁grace +▁graceful +▁gracious +▁gradually +▁grand +▁grandfather +▁grandmother +▁granite +▁grant +▁grasp +▁grass +▁grateful +▁gratify +▁gratitude +▁grave +▁gravity +▁gray +▁gre +▁great +▁greater +▁greatest +▁greatly +▁greek +▁green +▁grew +▁grey +▁gri +▁grief +▁grieve +▁grim +▁grin +▁gro +▁groan +▁ground +▁group +▁grove +▁grow +▁growing +▁growl +▁grown +▁growth +▁gu +▁guard +▁guess +▁guest +▁guide +▁guilt +▁guilty +▁guinea +▁gun +▁ha +▁habit +▁habitual +▁had +▁hair +▁hale +▁half +▁hall +▁halt +▁ham +▁hamilton +▁hammer +▁hand +▁handkerchief +▁hands +▁handsome +▁hang +▁hanging +▁hans +▁happen +▁happened +▁happier +▁happily +▁happiness +▁happy +▁har +▁harbor +▁harbour +▁hard +▁hardly +▁harm +▁harmoni +▁harmony +▁harry +▁harsh +▁harvest +▁has +▁haste +▁hastened +▁hastily +▁hat +▁hate +▁hath +▁hatred +▁haunt +▁have +▁haven +▁having +▁hawk +▁hay +▁he +▁head +▁heads +▁health +▁heap +▁hear +▁heard +▁hearing +▁heart +▁heat +▁heaven +▁heavily +▁heavy +▁hebrew +▁hedge +▁height +▁held +▁helen +▁help +▁helpless +▁hence +▁henry +▁her +▁herbert +▁hercules +▁here +▁hero +▁herself +▁hesitate +▁hesitated +▁hesitation +▁hi +▁hid +▁hidden +▁hide +▁hideous +▁high +▁higher +▁highest +▁hill +▁hills +▁him +▁himself +▁hind +▁hint +▁his +▁history +▁hit +▁hither +▁hitherto +▁ho +▁hoarse +▁hold +▁holding +▁hole +▁holiday +▁holland +▁hollow +▁holy +▁home +▁honest +▁honey +▁honor +▁honour +▁hook +▁hope +▁hoped +▁hopeless +▁hoping +▁hori +▁horn +▁horrible +▁horrid +▁horror +▁horse +▁horses +▁hospital +▁host +▁hot +▁hotel +▁hour +▁hours +▁house +▁household +▁housekeeper +▁houses +▁how +▁however +▁hu +▁huge +▁hum +▁human +▁humanity +▁humble +▁humor +▁humour +▁hundred +▁hung +▁hunger +▁hungry +▁hunt +▁hunter +▁hunting +▁hurried +▁hurry +▁hurt +▁husband +▁hush +▁hut +▁hy +▁hymn +▁hypnoti +▁i +▁ice +▁idea +▁ideal +▁ideas +▁identity +▁idiot +▁idle +▁if +▁ignor +▁ignorance +▁ignorant +▁ill +▁illusion +▁illustrat +▁image +▁imagination +▁imagine +▁imitat +▁immediate +▁immediately +▁immense +▁immortal +▁imp +▁impart +▁impatience +▁impatient +▁imperfect +▁imperial +▁import +▁importance +▁important +▁impossible +▁impressed +▁impression +▁improve +▁improvement +▁impulse +▁in +▁incapable +▁incense +▁incessant +▁inches +▁incident +▁inclination +▁inclined +▁includ +▁income +▁increase +▁increased +▁increasing +▁indeed +▁independence +▁independent +▁india +▁indian +▁indians +▁indifference +▁indifferent +▁indignant +▁indignation +▁individual +▁induce +▁indulge +▁industry +▁inevitable +▁infant +▁inferior +▁infinite +▁inflict +▁influence +▁information +▁informed +▁inhabit +▁inhabitants +▁inherit +▁injured +▁injury +▁injustice +▁innocence +▁innocent +▁innumerable +▁inquire +▁inquired +▁inquiries +▁inquiry +▁insect +▁inside +▁insist +▁inspector +▁instance +▁instant +▁instantly +▁instead +▁instinct +▁instinctively +▁institution +▁instruct +▁instrument +▁insult +▁intellect +▁intellectual +▁intelligence +▁intelligent +▁intelligible +▁intend +▁intended +▁intense +▁intensity +▁intent +▁intention +▁inter +▁intercourse +▁interest +▁interested +▁interesting +▁interfere +▁internal +▁interposed +▁interpret +▁interrupt +▁interrupted +▁interval +▁interven +▁interview +▁intimacy +▁intimate +▁into +▁introduced +▁invariably +▁invent +▁investigat +▁invisible +▁invitation +▁invited +▁ireland +▁irish +▁iron +▁irre +▁irregular +▁irresistible +▁is +▁isabel +▁island +▁isn +▁issue +▁it +▁italian +▁italy +▁its +▁itself +▁j +▁ja +▁jack +▁jackson +▁jacob +▁james +▁jane +▁january +▁japanese +▁jar +▁jasper +▁jaw +▁je +▁jealous +▁jean +▁jerk +▁jerry +▁jerusalem +▁jest +▁jesus +▁jew +▁jewel +▁jim +▁jimmie +▁jimmy +▁jo +▁job +▁joe +▁john +▁johnson +▁join +▁joined +▁joke +▁jolly +▁jones +▁joseph +▁journal +▁journey +▁joy +▁ju +▁jud +▁judge +▁judgment +▁juice +▁julia +▁julie +▁julius +▁jump +▁jumped +▁june +▁jungle +▁just +▁justice +▁justify +▁k +▁ka +▁kate +▁katy +▁keen +▁keep +▁keeping +▁keith +▁ken +▁kennedy +▁kept +▁kettle +▁key +▁ki +▁kick +▁kill +▁killed +▁kind +▁kindly +▁kindness +▁king +▁kingdom +▁kiss +▁kissed +▁kit +▁kitchen +▁kitty +▁knee +▁knees +▁knelt +▁knew +▁knife +▁knight +▁knit +▁knock +▁knot +▁know +▁knowing +▁knowledge +▁known +▁knows +▁ko +▁la +▁labor +▁labour +▁lace +▁lack +▁lad +▁ladder +▁ladies +▁lady +▁laid +▁lake +▁lamb +▁lament +▁lamp +▁land +▁landlord +▁landscape +▁lane +▁language +▁lantern +▁lap +▁large +▁larger +▁last +▁late +▁later +▁latter +▁laugh +▁laughed +▁laughing +▁laughter +▁launcelot +▁launch +▁laura +▁law +▁laws +▁lawyer +▁lay +▁le +▁lead +▁leader +▁leading +▁leaf +▁league +▁lean +▁leaned +▁leaning +▁leap +▁learn +▁learned +▁least +▁leather +▁leave +▁leaves +▁leaving +▁lecture +▁led +▁left +▁leg +▁legend +▁legislature +▁legs +▁leisure +▁lemon +▁lend +▁length +▁leonora +▁less +▁lesson +▁lest +▁let +▁letter +▁letters +▁level +▁levin +▁li +▁liberal +▁liberty +▁library +▁lie +▁lies +▁lieutenant +▁life +▁lift +▁lifted +▁light +▁lightning +▁like +▁liked +▁likely +▁likewise +▁limb +▁limit +▁lin +▁lincoln +▁line +▁lines +▁linger +▁lion +▁lips +▁liquid +▁liquor +▁list +▁listen +▁listened +▁listening +▁literally +▁literary +▁literature +▁little +▁live +▁lived +▁lives +▁living +▁lo +▁load +▁loaf +▁local +▁lock +▁locked +▁lodge +▁lodging +▁lofty +▁log +▁london +▁lonely +▁long +▁longer +▁look +▁looked +▁looking +▁looks +▁loose +▁lord +▁lose +▁losing +▁loss +▁lost +▁lot +▁loud +▁louis +▁loung +▁love +▁loved +▁lovely +▁lover +▁loving +▁low +▁lower +▁loyal +▁lu +▁luc +▁luck +▁lucy +▁lunch +▁luxury +▁lying +▁lyn +▁m +▁ma +▁mac +▁machine +▁mad +▁madam +▁madame +▁made +▁mademoiselle +▁maggie +▁magic +▁magician +▁magistrate +▁magnificent +▁maid +▁maiden +▁main +▁maintain +▁majesty +▁major +▁majority +▁make +▁makes +▁making +▁mal +▁male +▁mamma +▁man +▁manage +▁managed +▁manifest +▁mankind +▁manner +▁manufacture +▁manuscript +▁many +▁mar +▁marble +▁march +▁margaret +▁marguerite +▁marian +▁marilla +▁mark +▁marked +▁market +▁marquis +▁marriage +▁married +▁marry +▁marsh +▁martha +▁martian +▁martin +▁martyr +▁marvel +▁marvellous +▁mary +▁mask +▁mass +▁master +▁mat +▁match +▁mate +▁material +▁matter +▁matters +▁matthew +▁maxim +▁may +▁maybe +▁me +▁meadow +▁meal +▁mean +▁meaning +▁means +▁meant +▁meantime +▁meanwhile +▁measure +▁meat +▁mechanical +▁medi +▁medical +▁medicine +▁meet +▁meeting +▁melancholy +▁member +▁members +▁memories +▁memory +▁men +▁mental +▁mention +▁mentioned +▁mer +▁merchant +▁mercy +▁mere +▁merely +▁merit +▁merry +▁message +▁messenger +▁met +▁metal +▁method +▁mexican +▁mi +▁michael +▁mid +▁middle +▁midnight +▁midst +▁might +▁mighty +▁mil +▁mild +▁mile +▁miles +▁military +▁milk +▁mill +▁million +▁min +▁mind +▁mine +▁mingled +▁minister +▁minute +▁minutes +▁miracle +▁mirror +▁mirth +▁mis +▁mischief +▁miserable +▁misery +▁misfortune +▁miss +▁mission +▁missus +▁mist +▁mistake +▁mistaken +▁mister +▁mistress +▁mitya +▁mix +▁mixture +▁mo +▁mock +▁mode +▁moderate +▁modern +▁modest +▁moment +▁mon +▁monarch +▁monday +▁money +▁monk +▁monkey +▁monsieur +▁monster +▁monstrous +▁monte +▁month +▁months +▁monument +▁mood +▁moon +▁moonlight +▁mor +▁moral +▁more +▁moreover +▁morning +▁morrow +▁mortal +▁moscow +▁most +▁mother +▁motion +▁motionless +▁motive +▁motor +▁mould +▁mount +▁mountain +▁mountains +▁mounted +▁mourn +▁mouse +▁mouth +▁move +▁moved +▁movement +▁moving +▁mu +▁much +▁mud +▁mule +▁multitude +▁murder +▁murderer +▁murmur +▁murmured +▁muscle +▁muscular +▁music +▁musket +▁must +▁muttered +▁mutual +▁my +▁myself +▁mysterious +▁mystery +▁na +▁nail +▁naked +▁name +▁named +▁nancy +▁napoleon +▁narrat +▁narrow +▁natasha +▁nation +▁national +▁native +▁natural +▁naturally +▁nature +▁naught +▁nautilus +▁nav +▁navigat +▁nay +▁ne +▁near +▁nearer +▁nearest +▁nearly +▁neat +▁necessarily +▁necessary +▁necessity +▁neck +▁need +▁needed +▁neglect +▁negro +▁neighbor +▁neighborhood +▁neighbour +▁neighbourhood +▁neither +▁nephew +▁nerve +▁nervous +▁nest +▁never +▁nevertheless +▁new +▁news +▁newspaper +▁next +▁ni +▁nice +▁nicholas +▁niece +▁nigh +▁night +▁nightingale +▁nine +▁nineteen +▁ninety +▁ninth +▁no +▁nobility +▁noble +▁nobody +▁nodded +▁noise +▁none +▁nonsense +▁nor +▁normal +▁norman +▁north +▁northern +▁nose +▁not +▁note +▁nothing +▁notice +▁noticed +▁notwithstanding +▁novel +▁november +▁now +▁nowhere +▁nu +▁number +▁numerous +▁nurse +▁nut +▁o +▁oak +▁oath +▁ob +▁obedience +▁obey +▁object +▁objection +▁obligation +▁obliged +▁obscure +▁observation +▁observe +▁observed +▁observing +▁obstacle +▁obstinate +▁obtain +▁obtained +▁obvious +▁occasion +▁occasionally +▁occupation +▁occupied +▁occupy +▁occur +▁occurred +▁occurrence +▁ocean +▁october +▁odd +▁of +▁off +▁offend +▁offer +▁offered +▁office +▁officer +▁officers +▁official +▁often +▁oh +▁oil +▁old +▁oliver +▁on +▁once +▁one +▁only +▁open +▁opened +▁opening +▁opera +▁operation +▁opinion +▁opponent +▁opportunity +▁opposite +▁opposition +▁oppress +▁or +▁orange +▁orchard +▁order +▁ordered +▁orders +▁ordinary +▁organ +▁organi +▁origin +▁original +▁ornament +▁other +▁others +▁otherwise +▁ought +▁ounce +▁our +▁ourselves +▁out +▁outrage +▁outside +▁oven +▁over +▁overcome +▁overflow +▁overlook +▁overtake +▁overwhelm +▁owe +▁owing +▁owl +▁own +▁oyster +▁p +▁pa +▁pace +▁pacific +▁pack +▁page +▁paid +▁pain +▁painful +▁painted +▁pair +▁pal +▁palace +▁pale +▁palm +▁pan +▁papa +▁paper +▁papers +▁par +▁para +▁paradise +▁parallel +▁parcel +▁pardon +▁parents +▁paris +▁park +▁parliament +▁parlor +▁parlour +▁part +▁particle +▁particular +▁particularly +▁parties +▁partner +▁parts +▁party +▁pass +▁passage +▁passed +▁passenger +▁passing +▁passion +▁passionate +▁past +▁pat +▁patch +▁path +▁patience +▁patient +▁patriot +▁paul +▁pause +▁paused +▁pavement +▁paw +▁pay +▁pe +▁pea +▁peace +▁peak +▁pearl +▁peasant +▁peculiar +▁peep +▁peer +▁pen +▁pencil +▁penetrate +▁penny +▁people +▁pepper +▁per +▁perceive +▁perceived +▁perceiving +▁perception +▁perch +▁perfect +▁perfection +▁perfectly +▁perform +▁performance +▁perfume +▁perhaps +▁peril +▁period +▁perish +▁permanent +▁permission +▁permit +▁permitted +▁perpetual +▁perplex +▁persecut +▁persist +▁person +▁personal +▁persons +▁persuade +▁pet +▁peter +▁pharaoh +▁phenomena +▁phenomenon +▁phil +▁philadelphia +▁philip +▁philosopher +▁philosophy +▁phoenix +▁photograph +▁phrase +▁physical +▁physician +▁pi +▁piano +▁pick +▁picked +▁picture +▁piece +▁pieces +▁pierced +▁pierre +▁pig +▁pile +▁pilgrim +▁pill +▁pillow +▁pilot +▁pin +▁pine +▁pink +▁pinocchio +▁pipe +▁pirate +▁pistol +▁pit +▁pitch +▁pitiful +▁pity +▁pla +▁plac +▁place +▁placed +▁places +▁plague +▁plain +▁plainly +▁plan +▁planet +▁plant +▁plate +▁platform +▁play +▁played +▁playing +▁plea +▁pleasant +▁please +▁pleased +▁pleasure +▁pledge +▁plenty +▁plot +▁plough +▁pluck +▁plum +▁plunder +▁plunge +▁po +▁pocket +▁poem +▁poet +▁poetry +▁point +▁pointed +▁poison +▁pole +▁police +▁policy +▁polish +▁polite +▁political +▁politics +▁polly +▁pond +▁pony +▁pool +▁poor +▁pope +▁popular +▁population +▁porch +▁port +▁porthos +▁portion +▁portrait +▁position +▁positive +▁possess +▁possessed +▁possession +▁possibility +▁possible +▁possibly +▁post +▁pot +▁pound +▁pounds +▁pour +▁poverty +▁powder +▁power +▁powerful +▁powers +▁pra +▁practical +▁practice +▁practise +▁prairie +▁praise +▁pray +▁prayer +▁pre +▁preach +▁precaution +▁precede +▁preceding +▁precious +▁precise +▁precisely +▁prefer +▁preferred +▁prejudice +▁preparation +▁prepare +▁prepared +▁preparing +▁presence +▁present +▁presented +▁presently +▁preserv +▁president +▁press +▁pressed +▁pressure +▁presume +▁pretend +▁pretty +▁prevail +▁prevent +▁previous +▁pri +▁price +▁pride +▁priest +▁primitive +▁prince +▁princess +▁principal +▁principle +▁print +▁priscilla +▁prison +▁prisoner +▁private +▁privilege +▁pro +▁probability +▁probable +▁probably +▁problem +▁proceed +▁proceeded +▁process +▁proclaim +▁procure +▁produce +▁produced +▁producing +▁product +▁profess +▁profession +▁professor +▁profit +▁profound +▁progress +▁prohibit +▁project +▁prominent +▁promise +▁promised +▁promising +▁promote +▁prompt +▁pronounc +▁proof +▁prop +▁proper +▁properly +▁property +▁prophet +▁proportion +▁proposal +▁propose +▁proposed +▁proposition +▁proprietor +▁prospect +▁prosperity +▁protect +▁protection +▁protest +▁proud +▁prove +▁proved +▁proverb +▁provide +▁provided +▁province +▁provision +▁provoke +▁prudence +▁prudent +▁psmith +▁pu +▁public +▁publish +▁puff +▁pull +▁pulled +▁pulse +▁punish +▁punishment +▁pupil +▁pur +▁purchase +▁pure +▁purple +▁purpose +▁purse +▁pursue +▁pursued +▁pursuit +▁push +▁pushed +▁put +▁putting +▁qua +▁quaint +▁qualities +▁quality +▁quantity +▁quarrel +▁quarter +▁queen +▁queer +▁question +▁questions +▁qui +▁quick +▁quickly +▁quiet +▁quietly +▁quite +▁quiver +▁quixote +▁quo +▁quoth +▁r +▁ra +▁rabbit +▁race +▁rachel +▁radiant +▁rag +▁rage +▁rail +▁railroad +▁railway +▁rain +▁rainbow +▁raise +▁raised +▁raising +▁ralph +▁ram +▁ran +▁rang +▁range +▁rank +▁raoul +▁rapid +▁rapidly +▁rare +▁rascal +▁rate +▁rather +▁rational +▁rattl +▁raven +▁ray +▁re +▁reach +▁reached +▁reaction +▁read +▁reader +▁readily +▁reading +▁ready +▁real +▁reali +▁reality +▁really +▁rear +▁reason +▁rebecca +▁rebel +▁recall +▁receive +▁received +▁receiving +▁recent +▁reception +▁recess +▁recit +▁reckless +▁reckon +▁recogni +▁recollect +▁recollection +▁recommend +▁reconcil +▁record +▁recover +▁recovered +▁red +▁reduced +▁refer +▁reference +▁refined +▁reflect +▁reflection +▁reform +▁refrain +▁refresh +▁refuge +▁refuse +▁refused +▁regain +▁regard +▁regarded +▁regiment +▁region +▁regret +▁regular +▁regulat +▁reign +▁reject +▁rejoice +▁rejoicing +▁relate +▁related +▁relation +▁relative +▁relax +▁release +▁reli +▁relief +▁relieve +▁religion +▁religious +▁reluctant +▁remain +▁remained +▁remark +▁remarkable +▁remarked +▁remedy +▁remember +▁remembered +▁remembrance +▁remind +▁remorse +▁remote +▁remove +▁removed +▁render +▁rendered +▁renew +▁rent +▁rep +▁repair +▁repeat +▁repeated +▁repent +▁replied +▁reply +▁report +▁represent +▁representative +▁reproach +▁republic +▁reputation +▁request +▁require +▁required +▁rescue +▁resemblance +▁resemble +▁reserve +▁residence +▁resign +▁resist +▁resistance +▁resolute +▁resolution +▁resolved +▁resort +▁resource +▁respect +▁response +▁responsibility +▁responsible +▁rest +▁restless +▁restore +▁restrain +▁result +▁resumed +▁retain +▁retire +▁retired +▁retorted +▁retreat +▁return +▁returned +▁returning +▁rev +▁reveal +▁revelation +▁revenge +▁rever +▁review +▁revolt +▁revolution +▁reward +▁ri +▁ribbon +▁rich +▁richard +▁richmond +▁rid +▁ride +▁ridiculous +▁riding +▁rifle +▁right +▁righteous +▁rigid +▁ring +▁ripe +▁rise +▁rising +▁risk +▁rival +▁river +▁ro +▁road +▁roar +▁roast +▁rob +▁robber +▁robe +▁robert +▁robin +▁rock +▁rocks +▁rode +▁roll +▁rolled +▁roman +▁rome +▁roof +▁room +▁root +▁rope +▁rosa +▁rose +▁rough +▁round +▁roused +▁route +▁row +▁royal +▁ru +▁rub +▁rubbed +▁rubbing +▁rude +▁ruin +▁rule +▁rum +▁run +▁running +▁rush +▁rushed +▁russia +▁russian +▁ruth +▁s +▁sa +▁sacred +▁sacrifice +▁sad +▁saddle +▁safe +▁safety +▁said +▁sail +▁sailor +▁saint +▁sake +▁sal +▁salt +▁salute +▁sam +▁same +▁samuel +▁san +▁sancho +▁sand +▁sang +▁sank +▁sarah +▁sat +▁satisfaction +▁satisfactory +▁satisfied +▁satisfy +▁saturday +▁sauce +▁savage +▁save +▁saved +▁saving +▁saw +▁say +▁saying +▁says +▁sc +▁sca +▁scale +▁scandal +▁scar +▁scarce +▁scarcely +▁scarecrow +▁scarlet +▁scattered +▁scene +▁scent +▁sch +▁scheme +▁scholar +▁school +▁science +▁scientific +▁scold +▁score +▁scorn +▁scotch +▁scotland +▁scott +▁scoundrel +▁scout +▁scramble +▁scrap +▁scratch +▁scream +▁screen +▁screw +▁scrooge +▁se +▁sea +▁seal +▁search +▁season +▁seat +▁seated +▁second +▁secret +▁secretary +▁section +▁secure +▁security +▁see +▁seeing +▁seek +▁seem +▁seemed +▁seems +▁seen +▁sei +▁seldom +▁select +▁self +▁selfish +▁sell +▁senate +▁senator +▁send +▁sensation +▁sense +▁sensible +▁sensitive +▁sent +▁sentence +▁sentiment +▁separate +▁separated +▁september +▁ser +▁serene +▁sergeant +▁series +▁serious +▁sermon +▁serpent +▁servant +▁servants +▁serve +▁served +▁service +▁serving +▁set +▁setting +▁settle +▁settled +▁seven +▁seventeen +▁seventy +▁several +▁severe +▁sex +▁sh +▁sha +▁shade +▁shadow +▁shaggy +▁shake +▁shakespeare +▁shaking +▁shall +▁shame +▁shape +▁share +▁sharp +▁sharply +▁shawl +▁she +▁sheep +▁shelter +▁shepherd +▁sheriff +▁shield +▁shift +▁shilling +▁shine +▁shining +▁ship +▁ships +▁shirt +▁shiver +▁shock +▁shoe +▁shoes +▁shone +▁shook +▁shoot +▁shop +▁shore +▁short +▁shot +▁should +▁shoulder +▁shoulders +▁shout +▁shouted +▁show +▁showed +▁shown +▁shrewd +▁shriek +▁shrill +▁shrink +▁shudder +▁shut +▁si +▁sick +▁side +▁sides +▁siege +▁sigh +▁sighed +▁sight +▁sign +▁signal +▁significance +▁significant +▁silence +▁silent +▁silk +▁silly +▁silver +▁similar +▁simon +▁simple +▁simplicity +▁simply +▁sin +▁since +▁sing +▁singing +▁single +▁singular +▁sink +▁sir +▁sister +▁sit +▁sitting +▁situated +▁situation +▁six +▁sixteen +▁sixty +▁sketch +▁ski +▁skilful +▁skill +▁skin +▁skirt +▁skull +▁sky +▁slain +▁slaughter +▁slave +▁slavery +▁slaves +▁sledge +▁sleep +▁sleeve +▁slender +▁slept +▁slew +▁slice +▁slid +▁slight +▁slightest +▁slightly +▁slim +▁slip +▁slipped +▁slo +▁slope +▁slow +▁slowly +▁slumber +▁small +▁smart +▁smash +▁smell +▁smile +▁smiled +▁smiling +▁smith +▁smoke +▁smoking +▁smooth +▁smot +▁snake +▁snap +▁snatch +▁sneer +▁snow +▁so +▁social +▁society +▁soft +▁softly +▁soil +▁sold +▁soldier +▁soldiers +▁solemn +▁solicit +▁solid +▁solitary +▁solitude +▁solomon +▁solution +▁some +▁somebody +▁somehow +▁someone +▁something +▁sometimes +▁somewhat +▁somewhere +▁son +▁song +▁soon +▁sooner +▁sooth +▁sorrow +▁sorry +▁sort +▁sought +▁soul +▁sound +▁source +▁south +▁southern +▁sovereign +▁sp +▁space +▁spain +▁spake +▁spaniard +▁spanish +▁spar +▁spare +▁spark +▁speak +▁speaking +▁spear +▁special +▁species +▁specimen +▁speck +▁spectacle +▁spectator +▁speculat +▁speech +▁speed +▁spell +▁spend +▁spent +▁sphere +▁spi +▁spin +▁spirit +▁spirits +▁spiritual +▁spite +▁splash +▁splendid +▁splendor +▁split +▁spoil +▁spoke +▁spoken +▁spoon +▁sport +▁spot +▁sprang +▁spread +▁spring +▁sprinkle +▁spur +▁squ +▁square +▁squee +▁squire +▁squirrel +▁st +▁sta +▁stable +▁staff +▁stage +▁stagger +▁staircase +▁stairs +▁stalk +▁stamp +▁stand +▁standard +▁standing +▁star +▁stared +▁stars +▁start +▁started +▁startled +▁state +▁statement +▁states +▁station +▁statue +▁stay +▁ste +▁steadily +▁steady +▁steal +▁steam +▁steel +▁steep +▁step +▁stephen +▁stepped +▁steps +▁stern +▁stick +▁stiff +▁still +▁stir +▁stirred +▁sto +▁stock +▁stole +▁stomach +▁stone +▁stones +▁stood +▁stooped +▁stop +▁stopped +▁stopping +▁store +▁stories +▁storm +▁story +▁stout +▁straight +▁strain +▁strait +▁strange +▁stranger +▁strap +▁strat +▁straw +▁stray +▁streak +▁stream +▁street +▁streets +▁strength +▁stretch +▁stretched +▁strew +▁stricken +▁strict +▁strike +▁striking +▁string +▁strip +▁stro +▁stroke +▁strong +▁struck +▁structure +▁struggle +▁struggling +▁stuck +▁student +▁studied +▁studies +▁studio +▁study +▁stuff +▁stumble +▁stump +▁stupid +▁style +▁su +▁sub +▁subdued +▁subject +▁sublime +▁submit +▁subsequent +▁substance +▁substantial +▁subtle +▁succeed +▁succeeded +▁success +▁successful +▁such +▁sudden +▁suddenly +▁suffer +▁suffered +▁suffering +▁suffice +▁sufficient +▁sufficiently +▁suffrage +▁sugar +▁suggest +▁suggested +▁suggestion +▁suit +▁sullen +▁sultan +▁sum +▁summer +▁summit +▁summon +▁sun +▁sunday +▁sunk +▁sunlight +▁sunrise +▁sunset +▁sunshine +▁super +▁superintend +▁superior +▁supper +▁supplied +▁supplies +▁supply +▁support +▁suppose +▁supposed +▁supposing +▁suppress +▁supreme +▁sur +▁sure +▁surely +▁surface +▁surgeon +▁surpass +▁surprise +▁surprised +▁surprising +▁surrender +▁surrounded +▁surrounding +▁survey +▁surviv +▁susan +▁suspect +▁suspicion +▁suspicious +▁sustain +▁sw +▁swa +▁swallow +▁swarm +▁swear +▁sweat +▁sweep +▁sweet +▁swell +▁swept +▁swift +▁swim +▁swimming +▁sword +▁swore +▁swung +▁sy +▁sylvia +▁symbol +▁sympathetic +▁sympathi +▁sympathy +▁symptom +▁system +▁t +▁ta +▁table +▁tail +▁take +▁taken +▁taking +▁tale +▁talent +▁talk +▁talked +▁talking +▁tall +▁tang +▁tank +▁tap +▁tar +▁task +▁taste +▁taught +▁tax +▁te +▁tea +▁teach +▁teacher +▁tear +▁tears +▁teeth +▁telegraph +▁telephone +▁tell +▁telling +▁temper +▁temperament +▁temperature +▁tempest +▁temple +▁temporary +▁tempt +▁temptation +▁ten +▁tendency +▁tender +▁tenderness +▁term +▁terms +▁terrace +▁terrible +▁terribly +▁terrified +▁territory +▁terror +▁test +▁testimony +▁text +▁th +▁than +▁thank +▁that +▁the +▁theatre +▁their +▁them +▁themselves +▁then +▁there +▁therefore +▁thereupon +▁these +▁they +▁thick +▁thief +▁thieves +▁thin +▁thing +▁things +▁think +▁thinking +▁third +▁thirst +▁thirteen +▁thirty +▁this +▁thither +▁thomas +▁thornton +▁thorough +▁thoroughly +▁those +▁thou +▁though +▁thought +▁thoughtfully +▁thoughts +▁thousand +▁thread +▁threat +▁threatened +▁threatening +▁three +▁threshold +▁threw +▁thrill +▁thro +▁throat +▁throne +▁throng +▁through +▁throughout +▁throw +▁throwing +▁thrown +▁thrust +▁thumb +▁thunder +▁thus +▁thy +▁thyself +▁ti +▁ticket +▁tide +▁tidings +▁tied +▁tight +▁till +▁timber +▁time +▁times +▁timid +▁tin +▁tiny +▁tip +▁tired +▁title +▁to +▁tobacco +▁today +▁together +▁told +▁tom +▁tomb +▁tomorrow +▁tone +▁tongue +▁too +▁took +▁top +▁torment +▁torrent +▁torture +▁total +▁touch +▁touched +▁toward +▁towards +▁tower +▁town +▁tra +▁trace +▁track +▁trade +▁tradition +▁tragedy +▁tragic +▁trail +▁train +▁traitor +▁tramp +▁tranquil +▁trans +▁transport +▁trap +▁travel +▁traveller +▁tre +▁tread +▁treasure +▁treat +▁treated +▁treatment +▁tree +▁trees +▁tremble +▁trembled +▁trembling +▁tremendous +▁trench +▁tri +▁trial +▁tribe +▁trick +▁tried +▁trifle +▁trifling +▁trip +▁tristram +▁triumph +▁triumphant +▁troop +▁troops +▁trot +▁trouble +▁troubled +▁trousers +▁trout +▁tru +▁true +▁truly +▁trumpet +▁trunk +▁trust +▁truth +▁try +▁trying +▁tu +▁tuesday +▁tulliver +▁tumble +▁tumult +▁turkey +▁turn +▁turned +▁turning +▁turtle +▁twas +▁twelve +▁twentieth +▁twenty +▁twice +▁twilight +▁twin +▁twist +▁two +▁type +▁tyrant +▁ugly +▁ultimate +▁umbrella +▁un +▁unable +▁unc +▁uncertain +▁uncle +▁uncomfortable +▁uncommon +▁unconscious +▁und +▁under +▁underneath +▁understand +▁understanding +▁understood +▁undertake +▁undertaking +▁undoubtedly +▁uneasiness +▁uneasy +▁unexpected +▁unfortunate +▁unhappy +▁uniform +▁union +▁united +▁universal +▁universe +▁university +▁unjust +▁unknown +▁unless +▁unlike +▁unnatural +▁unnecessary +▁unpleasant +▁unre +▁unseen +▁until +▁unto +▁unusual +▁unwilling +▁unworthy +▁up +▁upon +▁upper +▁upstairs +▁urge +▁us +▁use +▁used +▁useful +▁useless +▁usual +▁usually +▁utili +▁utmost +▁utter +▁uttered +▁utterly +▁va +▁vacant +▁vague +▁vain +▁val +▁valentine +▁valjean +▁valley +▁valuable +▁value +▁van +▁vanished +▁vari +▁variety +▁various +▁vast +▁vault +▁ve +▁vegetable +▁vehicle +▁veil +▁velvet +▁ven +▁vengeance +▁venture +▁ventured +▁ver +▁verse +▁very +▁vessel +▁vexed +▁vi +▁vibrat +▁vice +▁victim +▁victor +▁victory +▁view +▁vigorous +▁village +▁villain +▁villefort +▁vine +▁violence +▁violent +▁violet +▁virgin +▁virginia +▁virtue +▁virtuous +▁visible +▁vision +▁visit +▁visitor +▁vital +▁vivid +▁vo +▁voice +▁vol +▁volume +▁volunteer +▁vote +▁vow +▁voyage +▁vulgar +▁w +▁wa +▁wag +▁wagon +▁waist +▁waistcoat +▁wait +▁waited +▁waiting +▁wake +▁wal +▁walk +▁walked +▁walking +▁wall +▁walls +▁walter +▁wander +▁wandering +▁want +▁wanted +▁war +▁warm +▁warn +▁warning +▁warrant +▁warrior +▁was +▁wash +▁washington +▁watch +▁watched +▁watching +▁water +▁wave +▁waves +▁waving +▁wax +▁way +▁ways +▁we +▁weak +▁weakness +▁wealth +▁weapon +▁wear +▁weary +▁weather +▁wedding +▁week +▁weeks +▁weep +▁weigh +▁weight +▁welcome +▁welfare +▁well +▁went +▁wept +▁were +▁west +▁western +▁wh +▁whale +▁what +▁whatever +▁wheat +▁wheel +▁when +▁whence +▁where +▁wherefore +▁whereupon +▁whether +▁whi +▁which +▁while +▁whilst +▁whip +▁whirl +▁whisk +▁whisper +▁whispered +▁whistle +▁white +▁whither +▁who +▁whole +▁wholly +▁whom +▁whose +▁why +▁wi +▁wicked +▁wide +▁widow +▁wife +▁wild +▁wilderness +▁will +▁william +▁willing +▁wilson +▁wilt +▁win +▁wind +▁window +▁windows +▁wine +▁wings +▁winter +▁wip +▁wire +▁wisdom +▁wise +▁wish +▁wished +▁wishes +▁wit +▁witch +▁with +▁withdraw +▁withdrew +▁within +▁without +▁witness +▁wives +▁woe +▁woke +▁wolf +▁wolves +▁woman +▁women +▁won +▁wonder +▁wondered +▁wonderful +▁wondering +▁wood +▁wooden +▁woods +▁word +▁words +▁wore +▁work +▁worked +▁working +▁world +▁worm +▁worn +▁worried +▁worry +▁worse +▁worship +▁worst +▁worth +▁worthy +▁would +▁wouldn +▁wound +▁wounded +▁wrap +▁wrapped +▁wrath +▁wreck +▁wren +▁wretch +▁wretched +▁wrinkl +▁wrist +▁write +▁writer +▁writing +▁written +▁wrong +▁wrote +▁wrought +▁ya +▁yard +▁ye +▁year +▁years +▁yellow +▁yes +▁yesterday +▁yet +▁yield +▁yo +▁yonder +▁york +▁you +▁young +▁your +▁yourself +▁yourselves +▁youth + diff --git a/modules/audio/asr/u2_conformer_librispeech/module.py b/modules/audio/asr/u2_conformer_librispeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b98277f56d4ee4e88a36ce4dfa0c32d35368b1e9 --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/module.py @@ -0,0 +1,74 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +import sys + +import numpy as np +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger + +import paddle +import soundfile as sf + +# TODO: Remove system path when deepspeech can be installed via pip. +sys.path.append(os.path.join(MODULE_HOME, 'u2_conformer_librispeech')) +from deepspeech.exps.u2.config import get_cfg_defaults +from deepspeech.utils.utility import UpdateConfig +from .u2_conformer_tester import U2ConformerTester + + +@moduleinfo( + name="u2_conformer_librispeech", version="1.0.0", summary="", author="Baidu", author_email="", type="audio/asr") +class U2Conformer(paddle.nn.Layer): + def __init__(self): + super(U2Conformer, self).__init__() + + # resource + res_dir = os.path.join(MODULE_HOME, 'u2_conformer_librispeech', 'assets') + conf_file = os.path.join(res_dir, 'conf/conformer.yaml') + checkpoint = os.path.join(res_dir, 'checkpoints/avg_30.pdparams') + + # config + self.config = get_cfg_defaults() + self.config.merge_from_file(conf_file) + + # TODO: Remove path updating snippet. + with UpdateConfig(self.config): + self.config.collator.vocab_filepath = os.path.join(res_dir, self.config.collator.vocab_filepath) + self.config.collator.spm_model_prefix = os.path.join(res_dir, self.config.collator.spm_model_prefix) + self.config.collator.augmentation_config = os.path.join(res_dir, self.config.collator.augmentation_config) + self.config.model.cmvn_file = os.path.join(res_dir, self.config.model.cmvn_file) + self.config.decoding.decoding_method = 'attention_rescoring' + self.config.decoding.batch_size = 1 + + # model + self.tester = U2ConformerTester(self.config) + self.tester.setup_model() + self.tester.resume(checkpoint) + + @staticmethod + def check_audio(audio_file): + sig, sample_rate = sf.read(audio_file) + assert sample_rate == 16000, 'Excepting sample rate of input audio is 16000, but got {}'.format(sample_rate) + + @serving + def speech_recognize(self, audio_file, device='cpu'): + assert os.path.isfile(audio_file), 'File not exists: {}'.format(audio_file) + self.check_audio(audio_file) + + paddle.set_device(device) + return self.tester.test(audio_file)[0][0] diff --git a/modules/audio/asr/u2_conformer_librispeech/requirements.txt b/modules/audio/asr/u2_conformer_librispeech/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..49fb307f43939536be9ee5661a5a712aeba0792b --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/requirements.txt @@ -0,0 +1,12 @@ +loguru +yacs +jsonlines +scipy==1.2.1 +sentencepiece +resampy==0.2.2 +SoundFile==0.9.0.post1 +soxbindings +kaldiio +typeguard +editdistance +textgrid diff --git a/modules/audio/asr/u2_conformer_librispeech/u2_conformer_tester.py b/modules/audio/asr/u2_conformer_librispeech/u2_conformer_tester.py new file mode 100644 index 0000000000000000000000000000000000000000..c4f8d47055e29d1522c224e15439c9575270cc96 --- /dev/null +++ b/modules/audio/asr/u2_conformer_librispeech/u2_conformer_tester.py @@ -0,0 +1,80 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Evaluation for U2 model.""" +import os +import sys + +import paddle + +from deepspeech.frontend.featurizer.text_featurizer import TextFeaturizer +from deepspeech.io.collator import SpeechCollator +from deepspeech.models.u2 import U2Model +from deepspeech.utils import mp_tools +from deepspeech.utils.utility import UpdateConfig + + +class U2ConformerTester: + def __init__(self, config): + self.config = config + self.collate_fn_test = SpeechCollator.from_config(config) + self._text_featurizer = TextFeaturizer( + unit_type=config.collator.unit_type, vocab_filepath=None, spm_model_prefix=config.collator.spm_model_prefix) + + @mp_tools.rank_zero_only + @paddle.no_grad() + def test(self, audio_file): + self.model.eval() + cfg = self.config.decoding + collate_fn_test = self.collate_fn_test + audio, _ = collate_fn_test.process_utterance(audio_file=audio_file, transcript="Hello") + audio_len = audio.shape[0] + audio = paddle.to_tensor(audio, dtype='float32') + audio_len = paddle.to_tensor(audio_len) + audio = paddle.unsqueeze(audio, axis=0) + vocab_list = collate_fn_test.vocab_list + + text_feature = self.collate_fn_test.text_feature + result_transcripts = self.model.decode( + audio, + audio_len, + text_feature=text_feature, + decoding_method=cfg.decoding_method, + lang_model_path=cfg.lang_model_path, + beam_alpha=cfg.alpha, + beam_beta=cfg.beta, + beam_size=cfg.beam_size, + cutoff_prob=cfg.cutoff_prob, + cutoff_top_n=cfg.cutoff_top_n, + num_processes=cfg.num_proc_bsearch, + ctc_weight=cfg.ctc_weight, + decoding_chunk_size=cfg.decoding_chunk_size, + num_decoding_left_chunks=cfg.num_decoding_left_chunks, + simulate_streaming=cfg.simulate_streaming) + + return result_transcripts + + def setup_model(self): + config = self.config.clone() + with UpdateConfig(config): + config.model.input_dim = self.collate_fn_test.feature_size + config.model.output_dim = self.collate_fn_test.vocab_size + + self.model = U2Model.from_config(config.model) + + def resume(self, checkpoint): + """Resume from the checkpoint at checkpoints in the output + directory or load a specified checkpoint. + """ + model_dict = paddle.load(checkpoint) + self.model.set_state_dict(model_dict) diff --git a/modules/audio/asr/u2_conformer_wenetspeech/README.md b/modules/audio/asr/u2_conformer_wenetspeech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3cc2442c3b577f4059202ea9f53a5f2eaa9cf192 --- /dev/null +++ b/modules/audio/asr/u2_conformer_wenetspeech/README.md @@ -0,0 +1,157 @@ +# u2_conformer_wenetspeech + +|模型名称|u2_conformer_wenetspeech| +| :--- | :---: | +|类别|语音-语音识别| +|网络|Conformer| +|数据集|WenetSpeech| +|是否支持Fine-tuning|否| +|模型大小|494MB| +|最新更新日期|2021-12-10| +|数据指标|中文CER 0.087 | + +## 一、模型基本信息 + +### 模型介绍 + +U2 Conformer模型是一种适用于英文和中文的end-to-end语音识别模型。u2_conformer_wenetspeech采用了conformer的encoder和transformer的decoder的模型结构,并且使用了ctc-prefix beam search的方式进行一遍打分,再利用attention decoder进行二次打分的方式进行解码来得到最终结果。 + +u2_conformer_wenetspeech在中文普通话开源语音数据集[WenetSpeech](https://wenet-e2e.github.io/WenetSpeech/)进行了预训练,该模型在其DEV测试集上的CER指标是0.087。 + +

+
+

+ +

+
+

+ +更多详情请参考: +- [Unified Streaming and Non-streaming Two-pass End-to-end Model for Speech Recognition](https://arxiv.org/abs/2012.05481) +- [Conformer: Convolution-augmented Transformer for Speech Recognition](https://arxiv.org/abs/2005.08100) +- [WenetSpeech: A 10000+ Hours Multi-domain Mandarin Corpus for Speech Recognition](https://arxiv.org/abs/2110.03370) + +## 二、安装 + +- ### 1、系统依赖 + + - libsndfile + - Linux + ```shell + $ sudo apt-get install libsndfile + or + $ sudo yum install libsndfile + ``` + - MacOs + ``` + $ brew install libsndfile + ``` + +- ### 2、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 3、安装 + + - ```shell + $ hub install u2_conformer_wenetspeech + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + # 采样率为16k,格式为wav的中文语音音频 + wav_file = '/PATH/TO/AUDIO' + + model = hub.Module( + name='u2_conformer_wenetspeech', + version='1.0.0') + text = model.speech_recognize(wav_file) + + print(text) + ``` + +- ### 2、API + - ```python + def check_audio(audio_file) + ``` + - 检查输入音频格式和采样率是否满足为16000,如果不满足,则重新采样至16000并将新的音频文件保存至相同目录。 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + + - ```python + def speech_recognize( + audio_file, + device='cpu', + ) + ``` + - 将输入的音频识别成文字 + + - **参数** + + - `audio_file`:本地音频文件(*.wav)的路径,如`/path/to/input.wav` + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `text`:str类型,返回输入音频的识别文字结果。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的语音识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m u2_conformer_wenetspeech + ``` + + - 这样就完成了一个语音识别服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 需要识别的音频的存放路径,确保部署服务的机器可访问 + file = '/path/to/input.wav' + + # 以key的方式指定text传入预测方法的时的参数,此例中为"audio_file" + data = {"audio_file": file} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/u2_conformer_wenetspeech" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install u2_conformer_wenetspeech + ``` diff --git a/modules/thirdparty/image/classification/marine_biometrics/__init__.py b/modules/audio/asr/u2_conformer_wenetspeech/__init__.py similarity index 100% rename from modules/thirdparty/image/classification/marine_biometrics/__init__.py rename to modules/audio/asr/u2_conformer_wenetspeech/__init__.py diff --git a/modules/audio/asr/u2_conformer_wenetspeech/module.py b/modules/audio/asr/u2_conformer_wenetspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..51ff08c77a2baf29e31ca70dac9d9109279b00c1 --- /dev/null +++ b/modules/audio/asr/u2_conformer_wenetspeech/module.py @@ -0,0 +1,56 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +from paddleaudio import load, save_wav +from paddlespeech.cli import ASRExecutor +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger + + +@moduleinfo( + name="u2_conformer_wenetspeech", version="1.0.0", summary="", author="Wenet", author_email="", type="audio/asr") +class U2Conformer(paddle.nn.Layer): + def __init__(self): + super(U2Conformer, self).__init__() + self.asr_executor = ASRExecutor() + self.asr_kw_args = { + 'model': 'conformer_wenetspeech', + 'lang': 'zh', + 'sample_rate': 16000, + 'config': None, # Set `config` and `ckpt_path` to None to use pretrained model. + 'ckpt_path': None, + } + + @staticmethod + def check_audio(audio_file): + assert audio_file.endswith('.wav'), 'Input file must be a wave file `*.wav`.' + sig, sample_rate = load(audio_file) + if sample_rate != 16000: + sig, _ = load(audio_file, 16000) + audio_file_16k = audio_file[:audio_file.rindex('.')] + '_16k.wav' + logger.info('Resampling to 16000 sample rate to new audio file: {}'.format(audio_file_16k)) + save_wav(sig, 16000, audio_file_16k) + return audio_file_16k + else: + return audio_file + + @serving + def speech_recognize(self, audio_file, device='cpu'): + assert os.path.isfile(audio_file), 'File not exists: {}'.format(audio_file) + audio_file = self.check_audio(audio_file) + text = self.asr_executor(audio_file=audio_file, device=device, **self.asr_kw_args) + return text diff --git a/modules/audio/asr/u2_conformer_wenetspeech/requirements.txt b/modules/audio/asr/u2_conformer_wenetspeech/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..697ab54b76553598c45dfe7764a014826b393114 --- /dev/null +++ b/modules/audio/asr/u2_conformer_wenetspeech/requirements.txt @@ -0,0 +1 @@ +paddlespeech==0.1.0a9 diff --git a/modules/audio/audio_classification/PANNs/cnn10/README.md b/modules/audio/audio_classification/PANNs/cnn10/README.md index 9dd7c78f3ef22dc5218a84713a65cbe57f4d6e79..c6ce4c5555ea3da45c386abca7e6fa9e1b5a49f2 100644 --- a/modules/audio/audio_classification/PANNs/cnn10/README.md +++ b/modules/audio/audio_classification/PANNs/cnn10/README.md @@ -1,68 +1,52 @@ -```shell -$ hub install panns_cnn10==1.0.0 -``` +# panns_cnn10 +|模型名称|panns_cnn10| +| :--- | :---: | +|类别|语音-声音分类| +|网络|PANNs| +|数据集|Google Audioset| +|是否支持Fine-tuning|是| +|模型大小|31MB| +|最新更新日期|2021-06-15| +|数据指标|mAP 0.380| + +## 一、模型基本信息 + +### 模型介绍 `panns_cnn10`是一个基于[Google Audioset](https://research.google.com/audioset/)数据集训练的声音分类/识别的模型。该模型主要包含8个卷积层和2个全连接层,模型参数为4.9M。经过预训练后,可以用于提取音频的embbedding,维度是512。 更多详情请参考论文:[PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf) -## API -```python -def __init__( - task, - num_class=None, - label_map=None, - load_checkpoint=None, - **kwargs, -) -``` - -创建Module对象。 - -**参数** - -* `task`: 任务名称,可为`sound-cls`或者`None`。`sound-cls`代表声音分类任务,可以对声音分类的数据集进行finetune;为`None`时可以获取预训练模型对音频进行分类/Tagging。 -* `num_classes`:声音分类任务的类别数,finetune时需要指定,数值与具体使用的数据集类别数一致。 -* `label_map`:预测时的类别映射表。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 - -```python -def predict( - data, - sample_rate, - batch_size=1, - feat_type='mel', - use_gpu=False -) -``` +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 -**参数** + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -* `data`: 待预测数据,格式为\[waveform1, wavwform2…,\],其中每个元素都是一个一维numpy列表,是音频的波形采样数值列表。 -* `sample_rate`:音频文件的采样率。 -* `feat_type`:音频特征的种类选取,当前支持`'mel'`(详情可查看[Mel-frequency cepstrum](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum))和原始波形特征`'raw'`。 -* `batch_size`:模型批处理大小。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 +- ### 2、安装 -**返回** + - ```shell + $ hub install panns_cnn10 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* `results`:list类型,不同任务类型的返回结果如下 - * 声音分类(task参数为`sound-cls`):列表里包含每个音频文件的分类标签。 - * Tagging(task参数为`None`):列表里包含每个音频文件527个类别([Audioset标签](https://research.google.com/audioset/))的得分。 +## 三、模型API预测 -**代码示例** +- ### 1、预测代码示例 -- [ESC50](https://github.com/karolpiczak/ESC-50)声音分类预测 - ```python + - ```python + # ESC50声音分类预测 import librosa import paddlehub as hub from paddlehub.datasets import ESC50 sr = 44100 # 音频文件的采样率 - wav_file = '/data/cat.wav' # 用于预测的音频文件路径 + wav_file = '/PATH/TO/AUDIO' # 用于预测的音频文件路径 checkpoint = 'model.pdparams' # 用于预测的模型参数 label_map = {idx: label for idx, label in enumerate(ESC50.label_list)} @@ -86,8 +70,8 @@ def predict( print('File: {}\tLable: {}'.format(wav_file, result[0])) ``` -- Audioset Tagging - ```python + - ```python + # Audioset Tagging import librosa import numpy as np import paddlehub as hub @@ -105,7 +89,7 @@ def predict( print(msg) sr = 44100 # 音频文件的采样率 - wav_file = '/data/cat.wav' # 用于预测的音频文件路径 + wav_file = '/PATH/TO/AUDIO' # 用于预测的音频文件路径 label_file = './audioset_labels.txt' # audioset标签文本文件 topk = 10 # 展示的topk数 @@ -130,23 +114,58 @@ def predict( show_topk(topk, label_map, wav_file, result[0]) ``` -详情可参考PaddleHub示例: -- [AudioClassification](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0/demo/audio_classification) - - -## 查看代码 - -https://github.com/qiuqiangkong/audioset_tagging_cnn +- ### 2、API + - ```python + def __init__( + task, + num_class=None, + label_map=None, + load_checkpoint=None, + **kwargs, + ) + ``` + - 创建Module对象。 + + - **参数** + - `task`: 任务名称,可为`sound-cls`或者`None`。`sound-cls`代表声音分类任务,可以对声音分类的数据集进行finetune;为`None`时可以获取预训练模型对音频进行分类/Tagging。 + - `num_classes`:声音分类任务的类别数,finetune时需要指定,数值与具体使用的数据集类别数一致。 + - `label_map`:预测时的类别映射表。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + sample_rate, + batch_size=1, + feat_type='mel', + use_gpu=False + ) + ``` + - 模型预测,输入为音频波形数据,输出为分类标签。 -## 依赖 + - **参数** + - `data`: 待预测数据,格式为\[waveform1, wavwform2…,\],其中每个元素都是一个一维numpy列表,是音频的波形采样数值列表。 + - `sample_rate`:音频文件的采样率。 + - `feat_type`:音频特征的种类选取,当前支持`'mel'`(详情可查看[Mel-frequency cepstrum](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum))和原始波形特征`'raw'`。 + - `batch_size`:模型批处理大小。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -paddlepaddle >= 2.0.0 + - **返回** + - `results`:list类型,不同任务类型的返回结果如下 + - 声音分类(task参数为`sound-cls`):列表里包含每个音频文件的分类标签。 + - Tagging(task参数为`None`):列表里包含每个音频文件527个类别([Audioset标签](https://research.google.com/audioset/))的得分。 -paddlehub >= 2.0.0 + 详情可参考PaddleHub示例: + - [AudioClassification](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0/demo/audio_classification) -## 更新历史 +## 四、更新历史 * 1.0.0 初始发布,动态图版本模型,支持声音分类`sound-cls`任务的fine-tune和基于Audioset Tagging预测。 + + ```shell + $ hub install panns_cnn10 + ``` diff --git a/modules/audio/audio_classification/PANNs/cnn14/README.md b/modules/audio/audio_classification/PANNs/cnn14/README.md index adb66f9c2c48972687e2436f67d836a91decbbd2..c65e7bea40d4c8066bb347f4338502de3c1914c9 100644 --- a/modules/audio/audio_classification/PANNs/cnn14/README.md +++ b/modules/audio/audio_classification/PANNs/cnn14/README.md @@ -1,68 +1,52 @@ -```shell -$ hub install panns_cnn14==1.0.0 -``` +# panns_cnn14 +|模型名称|panns_cnn14| +| :--- | :---: | +|类别|语音-声音分类| +|网络|PANNs| +|数据集|Google Audioset| +|是否支持Fine-tuning|是| +|模型大小|469MB| +|最新更新日期|2021-06-15| +|数据指标|mAP 0.431| + +## 一、模型基本信息 + +### 模型介绍 `panns_cnn14`是一个基于[Google Audioset](https://research.google.com/audioset/)数据集训练的声音分类/识别的模型。该模型主要包含12个卷积层和2个全连接层,模型参数为79.6M。经过预训练后,可以用于提取音频的embbedding,维度是2048。 更多详情请参考论文:[PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf) -## API -```python -def __init__( - task, - num_class=None, - label_map=None, - load_checkpoint=None, - **kwargs, -) -``` - -创建Module对象。 - -**参数** - -* `task`: 任务名称,可为`sound-cls`或者`None`。`sound-cls`代表声音分类任务,可以对声音分类的数据集进行finetune;为`None`时可以获取预训练模型对音频进行分类/Tagging。 -* `num_classes`:声音分类任务的类别数,finetune时需要指定,数值与具体使用的数据集类别数一致。 -* `label_map`:预测时的类别映射表。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 - -```python -def predict( - data, - sample_rate, - batch_size=1, - feat_type='mel', - use_gpu=False -) -``` +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 -**参数** + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -* `data`: 待预测数据,格式为\[waveform1, wavwform2…,\],其中每个元素都是一个一维numpy列表,是音频的波形采样数值列表。 -* `sample_rate`:音频文件的采样率。 -* `feat_type`:音频特征的种类选取,当前支持`'mel'`(详情可查看[Mel-frequency cepstrum](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum))和原始波形特征`'raw'`。 -* `batch_size`:模型批处理大小。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 +- ### 2、安装 -**返回** + - ```shell + $ hub install panns_cnn14 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* `results`:list类型,不同任务类型的返回结果如下 - * 声音分类(task参数为`sound-cls`):列表里包含每个音频文件的分类标签。 - * Tagging(task参数为`None`):列表里包含每个音频文件527个类别([Audioset标签](https://research.google.com/audioset/))的得分。 +## 三、模型API预测 -**代码示例** +- ### 1、预测代码示例 -- [ESC50](https://github.com/karolpiczak/ESC-50)声音分类预测 - ```python + - ```python + # ESC50声音分类预测 import librosa import paddlehub as hub from paddlehub.datasets import ESC50 sr = 44100 # 音频文件的采样率 - wav_file = '/data/cat.wav' # 用于预测的音频文件路径 + wav_file = '/PATH/TO/AUDIO' # 用于预测的音频文件路径 checkpoint = 'model.pdparams' # 用于预测的模型参数 label_map = {idx: label for idx, label in enumerate(ESC50.label_list)} @@ -86,8 +70,8 @@ def predict( print('File: {}\tLable: {}'.format(wav_file, result[0])) ``` -- Audioset Tagging - ```python + - ```python + # Audioset Tagging import librosa import numpy as np import paddlehub as hub @@ -105,7 +89,7 @@ def predict( print(msg) sr = 44100 # 音频文件的采样率 - wav_file = '/data/cat.wav' # 用于预测的音频文件路径 + wav_file = '/PATH/TO/AUDIO' # 用于预测的音频文件路径 label_file = './audioset_labels.txt' # audioset标签文本文件 topk = 10 # 展示的topk数 @@ -130,23 +114,58 @@ def predict( show_topk(topk, label_map, wav_file, result[0]) ``` -详情可参考PaddleHub示例: -- [AudioClassification](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0/demo/audio_classification) - - -## 查看代码 - -https://github.com/qiuqiangkong/audioset_tagging_cnn +- ### 2、API + - ```python + def __init__( + task, + num_class=None, + label_map=None, + load_checkpoint=None, + **kwargs, + ) + ``` + - 创建Module对象。 + + - **参数** + - `task`: 任务名称,可为`sound-cls`或者`None`。`sound-cls`代表声音分类任务,可以对声音分类的数据集进行finetune;为`None`时可以获取预训练模型对音频进行分类/Tagging。 + - `num_classes`:声音分类任务的类别数,finetune时需要指定,数值与具体使用的数据集类别数一致。 + - `label_map`:预测时的类别映射表。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + sample_rate, + batch_size=1, + feat_type='mel', + use_gpu=False + ) + ``` + - 模型预测,输入为音频波形数据,输出为分类标签。 -## 依赖 + - **参数** + - `data`: 待预测数据,格式为\[waveform1, wavwform2…,\],其中每个元素都是一个一维numpy列表,是音频的波形采样数值列表。 + - `sample_rate`:音频文件的采样率。 + - `feat_type`:音频特征的种类选取,当前支持`'mel'`(详情可查看[Mel-frequency cepstrum](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum))和原始波形特征`'raw'`。 + - `batch_size`:模型批处理大小。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -paddlepaddle >= 2.0.0 + - **返回** + - `results`:list类型,不同任务类型的返回结果如下 + - 声音分类(task参数为`sound-cls`):列表里包含每个音频文件的分类标签。 + - Tagging(task参数为`None`):列表里包含每个音频文件527个类别([Audioset标签](https://research.google.com/audioset/))的得分。 -paddlehub >= 2.0.0 + 详情可参考PaddleHub示例: + - [AudioClassification](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0/demo/audio_classification) -## 更新历史 +## 四、更新历史 * 1.0.0 初始发布,动态图版本模型,支持声音分类`sound-cls`任务的fine-tune和基于Audioset Tagging预测。 + + ```shell + $ hub install panns_cnn14 + ``` diff --git a/modules/audio/audio_classification/PANNs/cnn6/README.md b/modules/audio/audio_classification/PANNs/cnn6/README.md index dd10c0b2600cf05ba92c2d512349038945af67b6..0e8b9442dd0f9dfefabd63502e37c05cce9572a5 100644 --- a/modules/audio/audio_classification/PANNs/cnn6/README.md +++ b/modules/audio/audio_classification/PANNs/cnn6/README.md @@ -1,68 +1,52 @@ -```shell -$ hub install panns_cnn6==1.0.0 -``` +# panns_cnn6 +|模型名称|panns_cnn6| +| :--- | :---: | +|类别|语音-声音分类| +|网络|PANNs| +|数据集|Google Audioset| +|是否支持Fine-tuning|是| +|模型大小|29MB| +|最新更新日期|2021-06-15| +|数据指标|mAP 0.343| -`panns_cnn6`是一个基于[Google Audioset](https://research.google.com/audioset/)数据集训练的声音分类/识别的模型。该模型主要包含4个卷积层和2个全连接层,模型参数为4.5M。经过预训练后,可以用于提取音频的embbedding,维度是512。 +## 一、模型基本信息 -更多详情请参考论文:[PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf) +### 模型介绍 -## API -```python -def __init__( - task, - num_class=None, - label_map=None, - load_checkpoint=None, - **kwargs, -) -``` +`panns_cnn6`是一个基于[Google Audioset](https://research.google.com/audioset/)数据集训练的声音分类/识别的模型。该模型主要包含4个卷积层和2个全连接层,模型参数为4.5M。经过预训练后,可以用于提取音频的embbedding,维度是512。 -创建Module对象。 +更多详情请参考:[PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf) -**参数** +## 二、安装 -* `task`: 任务名称,可为`sound-cls`或者`None`。`sound-cls`代表声音分类任务,可以对声音分类的数据集进行finetune;为`None`时可以获取预训练模型对音频进行分类/Tagging。 -* `num_classes`:声音分类任务的类别数,finetune时需要指定,数值与具体使用的数据集类别数一致。 -* `label_map`:预测时的类别映射表。 -* `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +- ### 1、环境依赖 -```python -def predict( - data, - sample_rate, - batch_size=1, - feat_type='mel', - use_gpu=False -) -``` + - paddlepaddle >= 2.0.0 -**参数** + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -* `data`: 待预测数据,格式为\[waveform1, wavwform2…,\],其中每个元素都是一个一维numpy列表,是音频的波形采样数值列表。 -* `sample_rate`:音频文件的采样率。 -* `feat_type`:音频特征的种类选取,当前支持`'mel'`(详情可查看[Mel-frequency cepstrum](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum))和原始波形特征`'raw'`。 -* `batch_size`:模型批处理大小。 -* `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 +- ### 2、安装 -**返回** + - ```shell + $ hub install panns_cnn6 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* `results`:list类型,不同任务类型的返回结果如下 - * 声音分类(task参数为`sound-cls`):列表里包含每个音频文件的分类标签。 - * Tagging(task参数为`None`):列表里包含每个音频文件527个类别([Audioset标签](https://research.google.com/audioset/))的得分。 +## 三、模型API预测 -**代码示例** +- ### 1、预测代码示例 -- [ESC50](https://github.com/karolpiczak/ESC-50)声音分类预测 - ```python + - ```python + # ESC50声音分类预测 import librosa import paddlehub as hub from paddlehub.datasets import ESC50 sr = 44100 # 音频文件的采样率 - wav_file = '/data/cat.wav' # 用于预测的音频文件路径 + wav_file = '/PATH/TO/AUDIO' # 用于预测的音频文件路径 checkpoint = 'model.pdparams' # 用于预测的模型参数 label_map = {idx: label for idx, label in enumerate(ESC50.label_list)} @@ -86,8 +70,8 @@ def predict( print('File: {}\tLable: {}'.format(wav_file, result[0])) ``` -- Audioset Tagging - ```python + - ```python + # Audioset Tagging import librosa import numpy as np import paddlehub as hub @@ -105,7 +89,7 @@ def predict( print(msg) sr = 44100 # 音频文件的采样率 - wav_file = '/data/cat.wav' # 用于预测的音频文件路径 + wav_file = '/PATH/TO/AUDIO' # 用于预测的音频文件路径 label_file = './audioset_labels.txt' # audioset标签文本文件 topk = 10 # 展示的topk数 @@ -130,23 +114,58 @@ def predict( show_topk(topk, label_map, wav_file, result[0]) ``` -详情可参考PaddleHub示例: -- [AudioClassification](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0/demo/audio_classification) - - -## 查看代码 - -https://github.com/qiuqiangkong/audioset_tagging_cnn +- ### 2、API + - ```python + def __init__( + task, + num_class=None, + label_map=None, + load_checkpoint=None, + **kwargs, + ) + ``` + - 创建Module对象。 + + - **参数** + - `task`: 任务名称,可为`sound-cls`或者`None`。`sound-cls`代表声音分类任务,可以对声音分类的数据集进行finetune;为`None`时可以获取预训练模型对音频进行分类/Tagging。 + - `num_classes`:声音分类任务的类别数,finetune时需要指定,数值与具体使用的数据集类别数一致。 + - `label_map`:预测时的类别映射表。 + - `load_checkpoint`:使用PaddleHub Fine-tune api训练保存的模型参数文件路径。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + + - ```python + def predict( + data, + sample_rate, + batch_size=1, + feat_type='mel', + use_gpu=False + ) + ``` + - 模型预测,输入为音频波形数据,输出为分类标签。 -## 依赖 + - **参数** + - `data`: 待预测数据,格式为\[waveform1, wavwform2…,\],其中每个元素都是一个一维numpy列表,是音频的波形采样数值列表。 + - `sample_rate`:音频文件的采样率。 + - `feat_type`:音频特征的种类选取,当前支持`'mel'`(详情可查看[Mel-frequency cepstrum](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum))和原始波形特征`'raw'`。 + - `batch_size`:模型批处理大小。 + - `use_gpu`:是否使用gpu,默认为False。对于GPU用户,建议开启use_gpu。 -paddlepaddle >= 2.0.0 + - **返回** + - `results`:list类型,不同任务类型的返回结果如下 + - 声音分类(task参数为`sound-cls`):列表里包含每个音频文件的分类标签。 + - Tagging(task参数为`None`):列表里包含每个音频文件527个类别([Audioset标签](https://research.google.com/audioset/))的得分。 -paddlehub >= 2.0.0 + 详情可参考PaddleHub示例: + - [AudioClassification](https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0/demo/audio_classification) -## 更新历史 +## 四、更新历史 * 1.0.0 初始发布,动态图版本模型,支持声音分类`sound-cls`任务的fine-tune和基于Audioset Tagging预测。 + + ```shell + $ hub install panns_cnn6 + ``` diff --git a/modules/audio/tts/deepvoice3_ljspeech/README.md b/modules/audio/tts/deepvoice3_ljspeech/README.md index a1a659d250f9d920e3d104092d033ea3921ab854..ea5d2636f092d36694015727833c0442a2cb247a 100644 --- a/modules/audio/tts/deepvoice3_ljspeech/README.md +++ b/modules/audio/tts/deepvoice3_ljspeech/README.md @@ -1,120 +1,147 @@ -## 概述 +# deepvoice3_ljspeech + +|模型名称|deepvoice3_ljspeech| +| :--- | :---: | +|类别|语音-语音合成| +|网络|DeepVoice3| +|数据集|LJSpeech-1.1| +|是否支持Fine-tuning|否| +|模型大小|58MB| +|最新更新日期|2020-10-27| +|数据指标|-| + +## 一、模型基本信息 + +### 模型介绍 Deep Voice 3是百度研究院2017年发布的端到端的TTS模型(论文录用于ICLR 2018)。它是一个基于卷积神经网络和注意力机制的seq2seq模型,由于不包含循环神经网络,它可以并行训练,远快于基于循环神经网络的模型。Deep Voice 3可以学习到多个说话人的特征,也支持搭配多种声码器使用。deepvoice3_ljspeech是基于ljspeech英文语音数据集预训练得到的英文TTS模型,仅支持预测。

-
+

更多详情参考论文[Deep Voice 3: Scaling Text-to-Speech with Convolutional Sequence Learning](https://arxiv.org/abs/1710.07654) -## 命令行预测 -```shell -$ hub run deepvoice3_ljspeech --input_text='Simple as this proposition is, it is necessary to be stated' --use_gpu True --vocoder griffin-lim -``` +## 二、安装 -## API +- ### 1、系统依赖 -```python -def synthesize(texts, use_gpu=False, vocoder="griffin-lim"): -``` + 对于Ubuntu用户,请执行: + ``` + sudo apt-get install libsndfile1 + ``` + 对于Centos用户,请执行: + ``` + sudo yum install libsndfile + ``` -预测API,由输入文本合成对应音频波形。 +- ### 2、环境依赖 -**参数** + - 2.0.0 > paddlepaddle >= 1.8.2 -* texts (list\[str\]): 待预测文本; -* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; -* vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" + - 2.0.0 > paddlehub >= 1.7.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -**返回** +- ### 3、安装 -* wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 -* sample\_rate (int): 合成音频的采样率。 + - ```shell + $ hub install deepvoice3_ljspeech + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -**代码示例** -```python -import paddlehub as hub -import soundfile as sf +## 三、模型API预测 -# Load deepvoice3_ljspeech module. -module = hub.Module(name="deepvoice3_ljspeech") +- ### 1、命令行预测 -# Predict sentiment label -test_texts = ['Simple as this proposition is, it is necessary to be stated', - 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'] -wavs, sample_rate = module.synthesize(texts=test_texts) -for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) -``` + - ```shell + $ hub run deepvoice3_ljspeech --input_text='Simple as this proposition is, it is necessary to be stated' --use_gpu True --vocoder griffin-lim + ``` + - 通过命令行方式实现语音合成模型的调用,更多请见[PaddleHub命令行指令](https://github.com/shinichiye/PaddleHub/blob/release/v2.1/docs/docs_ch/tutorial/cmd_usage.rst) -## 服务部署 +- ### 2、预测代码示例 -PaddleHub Serving 可以部署在线服务。 + - ```python + import paddlehub as hub + import soundfile as sf -### 第一步:启动PaddleHub Serving + # Load deepvoice3_ljspeech module. + module = hub.Module(name="deepvoice3_ljspeech") -运行启动命令: -```shell -$ hub serving start -m deepvoice3_ljspeech -``` + # Predict sentiment label + test_texts = ['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'] + wavs, sample_rate = module.synthesize(texts=test_texts) + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) + ``` -这样就完成了一个服务化API的部署,默认端口号为8866。 +- ### 3、API -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - ```python + def synthesize(texts, use_gpu=False, vocoder="griffin-lim"): + ``` -### 第二步:发送预测请求 + - 预测API,由输入文本合成对应音频波形。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - **参数** + - texts (list\[str\]): 待预测文本; + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; + - vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" -```python -import requests -import json + - **返回** + - wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 + - sample\_rate (int): 合成音频的采样率。 -import soundfile as sf -# 发送HTTP请求 +## 四、服务部署 -data = {'texts':['Simple as this proposition is, it is necessary to be stated', - 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], - 'use_gpu':False} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/deepvoice3_ljspeech" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) +- PaddleHub Serving可以部署一个在线语音合成服务,可以将此接口用于在线web应用。 -# 保存结果 -result = r.json()["results"] -wavs = result["wavs"] -sample_rate = result["sample_rate"] -for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) -``` +- ### 第一步:启动PaddleHub Serving -## 查看代码 + - 运行启动命令 + - ```shell + $ hub serving start -m deepvoice3_ljspeech + ``` + - 这样就完成了服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -https://github.com/PaddlePaddle/Parakeet +- ### 第二步:发送预测请求 -### 依赖 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -paddlepaddle >= 1.8.2 + - ```python + import requests + import json -paddlehub >= 1.7.0 + import soundfile as sf -**NOTE:** 除了python依赖外还必须安装libsndfile库 + # 发送HTTP请求 -对于Ubuntu用户,请执行: -``` -sudo apt-get install libsndfile1 -``` -对于Centos用户,请执行: -``` -sudo yum install libsndfile -``` + data = {'texts':['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], + 'use_gpu':False} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/deepvoice3_ljspeech" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) -## 更新历史 + # 保存结果 + result = r.json()["results"] + wavs = result["wavs"] + sample_rate = result["sample_rate"] + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) + ``` + + +## 五、更新历史 * 1.0.0 初始发布 + + ```shell + $ hub install deepvoice3_ljspeech + ``` diff --git a/modules/audio/tts/fastspeech2_baker/README.md b/modules/audio/tts/fastspeech2_baker/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1ec244d616108ab3781af885d31197ddcc5b31b3 --- /dev/null +++ b/modules/audio/tts/fastspeech2_baker/README.md @@ -0,0 +1,156 @@ +# fastspeech2_baker + +|模型名称|fastspeech2_baker| +| :--- | :---: | +|类别|语音-语音合成| +|网络|FastSpeech2| +|数据集|Chinese Standard Mandarin Speech Copus| +|是否支持Fine-tuning|否| +|模型大小|621MB| +|最新更新日期|2021-10-20| +|数据指标|-| + +## 一、模型基本信息 + +### 模型介绍 + +FastSpeech2是微软亚洲研究院和微软Azure语音团队联合浙江大学于2020年提出的语音合成(Text to Speech, TTS)模型。FastSpeech2是FastSpeech的改进版,解决了FastSpeech依赖Teacher-Student的知识蒸馏框架,训练流程比较复杂和训练目标相比真实语音存在信息损失的问题。 + +FastSpeech2的模型架构如下图所示,它沿用FastSpeech中提出的Feed-Forward Transformer(FFT)架构,但在音素编码器和梅尔频谱解码器中加入了一个可变信息适配器(Variance Adaptor),从而支持在FastSpeech2中引入更多语音中变化的信息,例如时长、音高、音量(频谱能量)等,来解决语音合成中的一对多映射问题。 + +

+
+

+ +Parallel WaveGAN是一种使用了无蒸馏的对抗生成网络,快速且占用空间小的波形生成方法。该方法通过联合优化多分辨率谱图和对抗损失函数来训练非自回归WaveNet,可以有效捕获真实语音波形的时频分布。Parallel WaveGAN的结构如下图所示: + +

+
+

+ +fastspeech2_baker使用了FastSpeech2作为声学模型,使用Parallel WaveGAN作为声码器,并在[中文标准女声音库(Chinese Standard Mandarin Speech Copus)](https://www.data-baker.com/open_source.html)数据集上进行了预训练,可直接用于预测合成音频。 + +更多详情请参考: +- [FastSpeech 2: Fast and High-Quality End-to-End Text-to-Speech](https://arxiv.org/abs/2006.04558) +- [FastSpeech语音合成系统技术升级,微软联合浙大提出FastSpeech2](https://www.msra.cn/zh-cn/news/features/fastspeech2) +- [Parallel WaveGAN: A fast waveform generation model based on generative adversarial networks with multi-resolution spectrogram](https://arxiv.org/abs/1910.11480) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install fastspeech2_baker + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + # 需要合成语音的文本 + sentences = ['这是一段测试语音合成的音频。'] + + model = hub.Module( + name='fastspeech2_baker', + version='1.0.0') + wav_files = model.generate(sentences) + + # 打印合成的音频文件的路径 + print(wav_files) + ``` + + 详情可参考PaddleHub示例: + - [语音合成](../../../../demo/text_to_speech) + + +- ### 2、API + - ```python + def __init__(output_dir) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `output_dir`: 合成音频文件的输出目录。 + + - ```python + def generate( + sentences, + device='cpu', + ) + ``` + - 将输入的文本合成为音频文件并保存到输出目录。 + + - **参数** + + - `sentences`:合成音频的文本列表,类型为`List[str]`。 + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `wav_files`:`List[str]`类型,返回合成音频的存放路径。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的语音识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m fastspeech2_baker + ``` + + - 这样就完成了一个语音识别服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 需要合成语音的文本 + sentences = [ + '这是第一段测试语音合成的音频。', + '这是第二段测试语音合成的音频。', + ] + + # 以key的方式指定text传入预测方法的时的参数,此例中为"sentences" + data = {"sentences": sentences} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/fastspeech2_baker" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install fastspeech2_baker + ``` diff --git a/modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/__init__.py b/modules/audio/tts/fastspeech2_baker/__init__.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/__init__.py rename to modules/audio/tts/fastspeech2_baker/__init__.py diff --git a/modules/audio/tts/fastspeech2_baker/assets/fastspeech2_nosil_baker_ckpt_0.4/default.yaml b/modules/audio/tts/fastspeech2_baker/assets/fastspeech2_nosil_baker_ckpt_0.4/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..63eaef16d118e3e9a0a14b028b750d0fce426e2f --- /dev/null +++ b/modules/audio/tts/fastspeech2_baker/assets/fastspeech2_nosil_baker_ckpt_0.4/default.yaml @@ -0,0 +1,104 @@ +########################################################### +# FEATURE EXTRACTION SETTING # +########################################################### + +fs: 24000 # sr +n_fft: 2048 # FFT size. +n_shift: 300 # Hop size. +win_length: 1200 # Window length. + # If set to null, it will be the same as fft_size. +window: "hann" # Window function. + +# Only used for feats_type != raw + +fmin: 80 # Minimum frequency of Mel basis. +fmax: 7600 # Maximum frequency of Mel basis. +n_mels: 80 # The number of mel basis. + +# Only used for the model using pitch features (e.g. FastSpeech2) +f0min: 80 # Maximum f0 for pitch extraction. +f0max: 400 # Minimum f0 for pitch extraction. + + +########################################################### +# DATA SETTING # +########################################################### +batch_size: 64 +num_workers: 4 + + +########################################################### +# MODEL SETTING # +########################################################### +model: + adim: 384 # attention dimension + aheads: 2 # number of attention heads + elayers: 4 # number of encoder layers + eunits: 1536 # number of encoder ff units + dlayers: 4 # number of decoder layers + dunits: 1536 # number of decoder ff units + positionwise_layer_type: conv1d # type of position-wise layer + positionwise_conv_kernel_size: 3 # kernel size of position wise conv layer + duration_predictor_layers: 2 # number of layers of duration predictor + duration_predictor_chans: 256 # number of channels of duration predictor + duration_predictor_kernel_size: 3 # filter size of duration predictor + postnet_layers: 5 # number of layers of postnset + postnet_filts: 5 # filter size of conv layers in postnet + postnet_chans: 256 # number of channels of conv layers in postnet + use_masking: True # whether to apply masking for padded part in loss calculation + use_scaled_pos_enc: True # whether to use scaled positional encoding + encoder_normalize_before: True # whether to perform layer normalization before the input + decoder_normalize_before: True # whether to perform layer normalization before the input + reduction_factor: 1 # reduction factor + init_type: xavier_uniform # initialization type + init_enc_alpha: 1.0 # initial value of alpha of encoder scaled position encoding + init_dec_alpha: 1.0 # initial value of alpha of decoder scaled position encoding + transformer_enc_dropout_rate: 0.2 # dropout rate for transformer encoder layer + transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding + transformer_enc_attn_dropout_rate: 0.2 # dropout rate for transformer encoder attention layer + transformer_dec_dropout_rate: 0.2 # dropout rate for transformer decoder layer + transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding + transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer + pitch_predictor_layers: 5 # number of conv layers in pitch predictor + pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor + pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch + pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch + stop_gradient_from_pitch_predictor: true # whether to stop the gradient from pitch predictor to encoder + energy_predictor_layers: 2 # number of conv layers in energy predictor + energy_predictor_chans: 256 # number of channels of conv layers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_dropout: 0.5 # dropout rate in energy predictor + energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy + energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy + stop_gradient_from_energy_predictor: false # whether to stop the gradient from energy predictor to encoder + + + +########################################################### +# UPDATER SETTING # +########################################################### +updater: + use_masking: True # whether to apply masking for padded part in loss calculation + + + +########################################################### +# OPTIMIZER SETTING # +########################################################### +optimizer: + optim: adam # optimizer type + learning_rate: 0.001 # learning rate + +########################################################### +# TRAINING SETTING # +########################################################### +max_epoch: 1000 +num_snapshots: 5 + + +########################################################### +# OTHER SETTING # +########################################################### +seed: 10086 diff --git a/modules/audio/tts/fastspeech2_baker/assets/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt b/modules/audio/tts/fastspeech2_baker/assets/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt new file mode 100644 index 0000000000000000000000000000000000000000..a7ca340266028818c683329ab1885ae986c44233 --- /dev/null +++ b/modules/audio/tts/fastspeech2_baker/assets/fastspeech2_nosil_baker_ckpt_0.4/phone_id_map.txt @@ -0,0 +1,268 @@ + 0 + 1 +a1 2 +a2 3 +a3 4 +a4 5 +a5 6 +ai1 7 +ai2 8 +ai3 9 +ai4 10 +ai5 11 +air2 12 +air4 13 +an1 14 +an2 15 +an3 16 +an4 17 +an5 18 +ang1 19 +ang2 20 +ang3 21 +ang4 22 +ang5 23 +anr1 24 +anr3 25 +anr4 26 +ao1 27 +ao2 28 +ao3 29 +ao4 30 +ao5 31 +aor3 32 +aor4 33 +ar2 34 +ar3 35 +ar4 36 +b 37 +c 38 +ch 39 +d 40 +e1 41 +e2 42 +e3 43 +e4 44 +e5 45 +ei1 46 +ei2 47 +ei3 48 +ei4 49 +ei5 50 +en1 51 +en2 52 +en3 53 +en4 54 +en5 55 +eng1 56 +eng2 57 +eng3 58 +eng4 59 +eng5 60 +enr1 61 +enr2 62 +enr4 63 +enr5 64 +er2 65 +er3 66 +er4 67 +er5 68 +f 69 +g 70 +h 71 +i1 72 +i2 73 +i3 74 +i4 75 +i5 76 +ia1 77 +ia2 78 +ia3 79 +ia4 80 +ia5 81 +ian1 82 +ian2 83 +ian3 84 +ian4 85 +ian5 86 +iang1 87 +iang2 88 +iang3 89 +iang4 90 +iang5 91 +iangr4 92 +ianr1 93 +ianr2 94 +ianr3 95 +iao1 96 +iao2 97 +iao3 98 +iao4 99 +iao5 100 +iar1 101 +iar3 102 +ie1 103 +ie2 104 +ie3 105 +ie4 106 +ie5 107 +ii1 108 +ii2 109 +ii3 110 +ii4 111 +ii5 112 +iii1 113 +iii2 114 +iii3 115 +iii4 116 +iii5 117 +iiir4 118 +iir2 119 +in1 120 +in2 121 +in3 122 +in4 123 +in5 124 +ing1 125 +ing2 126 +ing3 127 +ing4 128 +ing5 129 +ingr2 130 +ingr3 131 +inr4 132 +io1 133 +io5 134 +iong1 135 +iong2 136 +iong3 137 +iong4 138 +iong5 139 +iou1 140 +iou2 141 +iou3 142 +iou4 143 +iou5 144 +iour1 145 +ir1 146 +ir2 147 +ir3 148 +ir4 149 +ir5 150 +j 151 +k 152 +l 153 +m 154 +n 155 +o1 156 +o2 157 +o3 158 +o4 159 +o5 160 +ong1 161 +ong2 162 +ong3 163 +ong4 164 +ong5 165 +ongr4 166 +ou1 167 +ou2 168 +ou3 169 +ou4 170 +ou5 171 +our2 172 +p 173 +q 174 +r 175 +s 176 +sh 177 +sil 178 +sp 179 +spl 180 +spn 181 +t 182 +u1 183 +u2 184 +u3 185 +u4 186 +u5 187 +ua1 188 +ua2 189 +ua3 190 +ua4 191 +ua5 192 +uai1 193 +uai2 194 +uai3 195 +uai4 196 +uai5 197 +uair4 198 +uan1 199 +uan2 200 +uan3 201 +uan4 202 +uan5 203 +uang1 204 +uang2 205 +uang3 206 +uang4 207 +uang5 208 +uanr1 209 +uanr2 210 +uei1 211 +uei2 212 +uei3 213 +uei4 214 +uei5 215 +ueir1 216 +ueir3 217 +ueir4 218 +uen1 219 +uen2 220 +uen3 221 +uen4 222 +uen5 223 +ueng1 224 +ueng2 225 +ueng3 226 +ueng4 227 +uenr3 228 +uenr4 229 +uo1 230 +uo2 231 +uo3 232 +uo4 233 +uo5 234 +uor2 235 +uor3 236 +ur3 237 +ur4 238 +v1 239 +v2 240 +v3 241 +v4 242 +v5 243 +van1 244 +van2 245 +van3 246 +van4 247 +van5 248 +vanr4 249 +ve1 250 +ve2 251 +ve3 252 +ve4 253 +ve5 254 +vn1 255 +vn2 256 +vn3 257 +vn4 258 +vn5 259 +x 260 +z 261 +zh 262 +, 263 +。 264 +? 265 +! 266 + 267 diff --git a/modules/audio/tts/fastspeech2_baker/assets/pwg_baker_ckpt_0.4/pwg_default.yaml b/modules/audio/tts/fastspeech2_baker/assets/pwg_baker_ckpt_0.4/pwg_default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..17edbc25515b58b77fbd3cc19c4b24234ff47083 --- /dev/null +++ b/modules/audio/tts/fastspeech2_baker/assets/pwg_baker_ckpt_0.4/pwg_default.yaml @@ -0,0 +1,128 @@ +# This is the hyperparameter configuration file for Parallel WaveGAN. +# Please make sure this is adjusted for the CSMSC dataset. If you want to +# apply to the other dataset, you might need to carefully change some parameters. +# This configuration requires 12 GB GPU memory and takes ~3 days on RTX TITAN. + +########################################################### +# FEATURE EXTRACTION SETTING # +########################################################### +fs: 24000 # Sampling rate. +n_fft: 2048 # FFT size. (in samples) +n_shift: 300 # Hop size. (in samples) +win_length: 1200 # Window length. (in samples) + # If set to null, it will be the same as fft_size. +window: "hann" # Window function. +n_mels: 80 # Number of mel basis. +fmin: 80 # Minimum freq in mel basis calculation. +fmax: 7600 # Maximum frequency in mel basis calculation. +# global_gain_scale: 1.0 # Will be multiplied to all of waveform. +trim_silence: false # Whether to trim the start and end of silence. +top_db: 60 # Need to tune carefully if the recording is not good. +trim_frame_length: 2048 # Frame size in trimming.(in samples) +trim_hop_length: 512 # Hop size in trimming.(in samples) + +########################################################### +# GENERATOR NETWORK ARCHITECTURE SETTING # +########################################################### +generator_params: + in_channels: 1 # Number of input channels. + out_channels: 1 # Number of output channels. + kernel_size: 3 # Kernel size of dilated convolution. + layers: 30 # Number of residual block layers. + stacks: 3 # Number of stacks i.e., dilation cycles. + residual_channels: 64 # Number of channels in residual conv. + gate_channels: 128 # Number of channels in gated conv. + skip_channels: 64 # Number of channels in skip conv. + aux_channels: 80 # Number of channels for auxiliary feature conv. + # Must be the same as num_mels. + aux_context_window: 2 # Context window size for auxiliary feature. + # If set to 2, previous 2 and future 2 frames will be considered. + dropout: 0.0 # Dropout rate. 0.0 means no dropout applied. + bias: true # use bias in residual blocks + use_weight_norm: true # Whether to use weight norm. + # If set to true, it will be applied to all of the conv layers. + use_causal_conv: false # use causal conv in residual blocks and upsample layers + # upsample_net: "ConvInUpsampleNetwork" # Upsampling network architecture. + upsample_scales: [4, 5, 3, 5] # Upsampling scales. Prodcut of these must be the same as hop size. + interpolate_mode: "nearest" # upsample net interpolate mode + freq_axis_kernel_size: 1 # upsamling net: convolution kernel size in frequencey axis + nonlinear_activation: null + nonlinear_activation_params: {} + +########################################################### +# DISCRIMINATOR NETWORK ARCHITECTURE SETTING # +########################################################### +discriminator_params: + in_channels: 1 # Number of input channels. + out_channels: 1 # Number of output channels. + kernel_size: 3 # Number of output channels. + layers: 10 # Number of conv layers. + conv_channels: 64 # Number of chnn layers. + bias: true # Whether to use bias parameter in conv. + use_weight_norm: true # Whether to use weight norm. + # If set to true, it will be applied to all of the conv layers. + nonlinear_activation: "LeakyReLU" # Nonlinear function after each conv. + nonlinear_activation_params: # Nonlinear function parameters + negative_slope: 0.2 # Alpha in LeakyReLU. + +########################################################### +# STFT LOSS SETTING # +########################################################### +stft_loss_params: + fft_sizes: [1024, 2048, 512] # List of FFT size for STFT-based loss. + hop_sizes: [120, 240, 50] # List of hop size for STFT-based loss + win_lengths: [600, 1200, 240] # List of window length for STFT-based loss. + window: "hann" # Window function for STFT-based loss + +########################################################### +# ADVERSARIAL LOSS SETTING # +########################################################### +lambda_adv: 4.0 # Loss balancing coefficient. + +########################################################### +# DATA LOADER SETTING # +########################################################### +batch_size: 6 # Batch size. +batch_max_steps: 25500 # Length of each audio in batch. Make sure dividable by hop_size. +pin_memory: true # Whether to pin memory in Pytorch DataLoader. +num_workers: 4 # Number of workers in Pytorch DataLoader. +remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps. +allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory. + +########################################################### +# OPTIMIZER & SCHEDULER SETTING # +########################################################### +generator_optimizer_params: + epsilon: 1.0e-6 # Generator's epsilon. + weight_decay: 0.0 # Generator's weight decay coefficient. +generator_scheduler_params: + learning_rate: 0.0001 # Generator's learning rate. + step_size: 200000 # Generator's scheduler step size. + gamma: 0.5 # Generator's scheduler gamma. + # At each step size, lr will be multiplied by this parameter. +generator_grad_norm: 10 # Generator's gradient norm. +discriminator_optimizer_params: + epsilon: 1.0e-6 # Discriminator's epsilon. + weight_decay: 0.0 # Discriminator's weight decay coefficient. +discriminator_scheduler_params: + learning_rate: 0.00005 # Discriminator's learning rate. + step_size: 200000 # Discriminator's scheduler step size. + gamma: 0.5 # Discriminator's scheduler gamma. + # At each step size, lr will be multiplied by this parameter. +discriminator_grad_norm: 1 # Discriminator's gradient norm. + +########################################################### +# INTERVAL SETTING # +########################################################### +discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator. +train_max_steps: 400000 # Number of training steps. +save_interval_steps: 5000 # Interval steps to save checkpoint. +eval_interval_steps: 1000 # Interval steps to evaluate the network. + + +########################################################### +# OTHER SETTING # +########################################################### +num_save_intermediate_results: 4 # Number of results to be saved as intermediate results. +num_snapshots: 10 # max number of snapshots to keep while training +seed: 42 # random seed for paddle, random, and np.random diff --git a/modules/audio/tts/fastspeech2_baker/module.py b/modules/audio/tts/fastspeech2_baker/module.py new file mode 100644 index 0000000000000000000000000000000000000000..03d150c9d989285ea9cf7eaceff469566e1a84ad --- /dev/null +++ b/modules/audio/tts/fastspeech2_baker/module.py @@ -0,0 +1,125 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +from typing import List + +import numpy as np +import paddle +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger +from parakeet.frontend.zh_frontend import Frontend +from parakeet.models.fastspeech2 import FastSpeech2 +from parakeet.models.fastspeech2 import FastSpeech2Inference +from parakeet.models.parallel_wavegan import PWGGenerator +from parakeet.models.parallel_wavegan import PWGInference +from parakeet.modules.normalizer import ZScore +import soundfile as sf +from yacs.config import CfgNode +import yaml + + +@moduleinfo(name="fastspeech2_baker", version="1.0.0", summary="", author="Baidu", author_email="", type="audio/tts") +class FastSpeech(paddle.nn.Layer): + def __init__(self, output_dir='./wavs'): + super(FastSpeech, self).__init__() + fastspeech2_res_dir = os.path.join(MODULE_HOME, 'fastspeech2_baker', 'assets/fastspeech2_nosil_baker_ckpt_0.4') + pwg_res_dir = os.path.join(MODULE_HOME, 'fastspeech2_baker', 'assets/pwg_baker_ckpt_0.4') + + phones_dict = os.path.join(fastspeech2_res_dir, 'phone_id_map.txt') + with open(phones_dict, "r") as f: + phn_id = [line.strip().split() for line in f.readlines()] + vocab_size = len(phn_id) + + # fastspeech2 + fastspeech2_config = os.path.join(fastspeech2_res_dir, 'default.yaml') + with open(fastspeech2_config) as f: + fastspeech2_config = CfgNode(yaml.safe_load(f)) + self.samplerate = fastspeech2_config.fs + + fastspeech2_checkpoint = os.path.join(fastspeech2_res_dir, 'snapshot_iter_76000.pdz') + model = FastSpeech2(idim=vocab_size, odim=fastspeech2_config.n_mels, **fastspeech2_config["model"]) + model.set_state_dict(paddle.load(fastspeech2_checkpoint)["main_params"]) + logger.info('Load fastspeech2 params from %s' % os.path.abspath(fastspeech2_checkpoint)) + model.eval() + + # vocoder + pwg_config = os.path.join(pwg_res_dir, 'pwg_default.yaml') + with open(pwg_config) as f: + pwg_config = CfgNode(yaml.safe_load(f)) + + pwg_checkpoint = os.path.join(pwg_res_dir, 'pwg_snapshot_iter_400000.pdz') + vocoder = PWGGenerator(**pwg_config["generator_params"]) + vocoder.set_state_dict(paddle.load(pwg_checkpoint)["generator_params"]) + logger.info('Load vocoder params from %s' % os.path.abspath(pwg_checkpoint)) + vocoder.remove_weight_norm() + vocoder.eval() + + # frontend + self.frontend = Frontend(phone_vocab_path=phones_dict) + + # stat + fastspeech2_stat = os.path.join(fastspeech2_res_dir, 'speech_stats.npy') + stat = np.load(fastspeech2_stat) + mu, std = stat + mu = paddle.to_tensor(mu) + std = paddle.to_tensor(std) + fastspeech2_normalizer = ZScore(mu, std) + + pwg_stat = os.path.join(pwg_res_dir, 'pwg_stats.npy') + stat = np.load(pwg_stat) + mu, std = stat + mu = paddle.to_tensor(mu) + std = paddle.to_tensor(std) + pwg_normalizer = ZScore(mu, std) + + # inference + self.fastspeech2_inference = FastSpeech2Inference(fastspeech2_normalizer, model) + self.pwg_inference = PWGInference(pwg_normalizer, vocoder) + + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + + def forward(self, text: str): + wav = None + input_ids = self.frontend.get_input_ids(text, merge_sentences=True) + phone_ids = input_ids["phone_ids"] + for part_phone_ids in phone_ids: + with paddle.no_grad(): + mel = self.fastspeech2_inference(part_phone_ids) + temp_wav = self.pwg_inference(mel) + if wav is None: + wav = temp_wav + else: + wav = paddle.concat([wav, temp_wav]) + + return wav + + @serving + def generate(self, sentences: List[str], device='cpu'): + assert isinstance(sentences, list) and isinstance(sentences[0], str), \ + 'Input data should be List[str], but got {}'.format(type(sentences)) + + paddle.set_device(device) + wav_files = [] + for i, sentence in enumerate(sentences): + wav = self(sentence) + wav_file = str(self.output_dir.absolute() / (str(i + 1) + ".wav")) + sf.write(wav_file, wav.numpy(), samplerate=self.samplerate) + wav_files.append(wav_file) + + logger.info('{} wave files have been generated in {}'.format(len(sentences), self.output_dir.absolute())) + return wav_files diff --git a/modules/audio/tts/fastspeech2_baker/requirements.txt b/modules/audio/tts/fastspeech2_baker/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f410f4f4238ed0017d04fb708edb3725c34784ac --- /dev/null +++ b/modules/audio/tts/fastspeech2_baker/requirements.txt @@ -0,0 +1 @@ +git+https://github.com/PaddlePaddle/Parakeet@8040cb0#egg=paddle-parakeet diff --git a/modules/audio/tts/fastspeech2_ljspeech/README.md b/modules/audio/tts/fastspeech2_ljspeech/README.md new file mode 100644 index 0000000000000000000000000000000000000000..54329460f30b073dc4a551c4dd09b94a56e8e1f3 --- /dev/null +++ b/modules/audio/tts/fastspeech2_ljspeech/README.md @@ -0,0 +1,156 @@ +# fastspeech2_ljspeech + +|模型名称|fastspeech2_ljspeech| +| :--- | :---: | +|类别|语音-语音合成| +|网络|FastSpeech2| +|数据集|LJSpeech-1.1| +|是否支持Fine-tuning|否| +|模型大小|425MB| +|最新更新日期|2021-10-20| +|数据指标|-| + +## 一、模型基本信息 + +### 模型介绍 + +FastSpeech2是微软亚洲研究院和微软Azure语音团队联合浙江大学于2020年提出的语音合成(Text to Speech, TTS)模型。FastSpeech2是FastSpeech的改进版,解决了FastSpeech依赖Teacher-Student的知识蒸馏框架,训练流程比较复杂和训练目标相比真实语音存在信息损失的问题。 + +FastSpeech2的模型架构如下图所示,它沿用FastSpeech中提出的Feed-Forward Transformer(FFT)架构,但在音素编码器和梅尔频谱解码器中加入了一个可变信息适配器(Variance Adaptor),从而支持在FastSpeech2中引入更多语音中变化的信息,例如时长、音高、音量(频谱能量)等,来解决语音合成中的一对多映射问题。 + +

+
+

+ +Parallel WaveGAN是一种使用了无蒸馏的对抗生成网络,快速且占用空间小的波形生成方法。该方法通过联合优化多分辨率谱图和对抗损失函数来训练非自回归WaveNet,可以有效捕获真实语音波形的时频分布。Parallel WaveGAN的结构如下图所示: + +

+
+

+ +fastspeech2_ljspeech使用了FastSpeech2作为声学模型,使用Parallel WaveGAN作为声码器,并在[The LJ Speech Dataset](https://keithito.com/LJ-Speech-Dataset/)数据集上进行了预训练,可直接用于预测合成音频。 + +更多详情请参考: +- [FastSpeech 2: Fast and High-Quality End-to-End Text-to-Speech](https://arxiv.org/abs/2006.04558) +- [FastSpeech语音合成系统技术升级,微软联合浙大提出FastSpeech2](https://www.msra.cn/zh-cn/news/features/fastspeech2) +- [Parallel WaveGAN: A fast waveform generation model based on generative adversarial networks with multi-resolution spectrogram](https://arxiv.org/abs/1910.11480) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install fastspeech2_ljspeech + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + # 需要合成语音的文本 + sentences = ['The quick brown fox jumps over a lazy dog.'] + + model = hub.Module( + name='fastspeech2_ljspeech', + version='1.0.0') + wav_files = model.generate(sentences) + + # 打印合成的音频文件的路径 + print(wav_files) + ``` + + 详情可参考PaddleHub示例: + - [语音合成](../../../../demo/text_to_speech) + + +- ### 2、API + - ```python + def __init__(output_dir) + ``` + + - 创建Module对象(动态图组网版本) + + - **参数** + + - `output_dir`: 合成音频文件的输出目录。 + + - ```python + def generate( + sentences, + device='cpu', + ) + ``` + - 将输入的文本合成为音频文件并保存到输出目录。 + + - **参数** + + - `sentences`:合成音频的文本列表,类型为`List[str]`。 + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `wav_files`:`List[str]`类型,返回合成音频的存放路径。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的语音识别服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m fastspeech2_ljspeech + ``` + + - 这样就完成了一个语音识别服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 需要合成语音的文本 + sentences = [ + 'The quick brown fox jumps over a lazy dog.', + 'Today is a good day!', + ] + + # 以key的方式指定text传入预测方法的时的参数,此例中为"sentences" + data = {"sentences": sentences} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/fastspeech2_ljspeech" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install fastspeech2_ljspeech + ``` diff --git a/modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/__init__.py b/modules/audio/tts/fastspeech2_ljspeech/__init__.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/__init__.py rename to modules/audio/tts/fastspeech2_ljspeech/__init__.py diff --git a/modules/audio/tts/fastspeech2_ljspeech/assets/fastspeech2_nosil_ljspeech_ckpt_0.5/default.yaml b/modules/audio/tts/fastspeech2_ljspeech/assets/fastspeech2_nosil_ljspeech_ckpt_0.5/default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cabcca80ba5552ac3d300616b44572f3c297656e --- /dev/null +++ b/modules/audio/tts/fastspeech2_ljspeech/assets/fastspeech2_nosil_ljspeech_ckpt_0.5/default.yaml @@ -0,0 +1,104 @@ +########################################################### +# FEATURE EXTRACTION SETTING # +########################################################### + +fs: 22050 # sr +n_fft: 1024 # FFT size. +n_shift: 256 # Hop size. +win_length: null # Window length. + # If set to null, it will be the same as fft_size. +window: "hann" # Window function. + +# Only used for feats_type != raw + +fmin: 80 # Minimum frequency of Mel basis. +fmax: 7600 # Maximum frequency of Mel basis. +n_mels: 80 # The number of mel basis. + +# Only used for the model using pitch features (e.g. FastSpeech2) +f0min: 80 # Maximum f0 for pitch extraction. +f0max: 400 # Minimum f0 for pitch extraction. + + +########################################################### +# DATA SETTING # +########################################################### +batch_size: 64 +num_workers: 4 + + +########################################################### +# MODEL SETTING # +########################################################### +model: + adim: 384 # attention dimension + aheads: 2 # number of attention heads + elayers: 4 # number of encoder layers + eunits: 1536 # number of encoder ff units + dlayers: 4 # number of decoder layers + dunits: 1536 # number of decoder ff units + positionwise_layer_type: conv1d # type of position-wise layer + positionwise_conv_kernel_size: 3 # kernel size of position wise conv layer + duration_predictor_layers: 2 # number of layers of duration predictor + duration_predictor_chans: 256 # number of channels of duration predictor + duration_predictor_kernel_size: 3 # filter size of duration predictor + postnet_layers: 5 # number of layers of postnset + postnet_filts: 5 # filter size of conv layers in postnet + postnet_chans: 256 # number of channels of conv layers in postnet + use_masking: True # whether to apply masking for padded part in loss calculation + use_scaled_pos_enc: True # whether to use scaled positional encoding + encoder_normalize_before: True # whether to perform layer normalization before the input + decoder_normalize_before: True # whether to perform layer normalization before the input + reduction_factor: 1 # reduction factor + init_type: xavier_uniform # initialization type + init_enc_alpha: 1.0 # initial value of alpha of encoder scaled position encoding + init_dec_alpha: 1.0 # initial value of alpha of decoder scaled position encoding + transformer_enc_dropout_rate: 0.2 # dropout rate for transformer encoder layer + transformer_enc_positional_dropout_rate: 0.2 # dropout rate for transformer encoder positional encoding + transformer_enc_attn_dropout_rate: 0.2 # dropout rate for transformer encoder attention layer + transformer_dec_dropout_rate: 0.2 # dropout rate for transformer decoder layer + transformer_dec_positional_dropout_rate: 0.2 # dropout rate for transformer decoder positional encoding + transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer + pitch_predictor_layers: 5 # number of conv layers in pitch predictor + pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor + pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch + pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch + stop_gradient_from_pitch_predictor: true # whether to stop the gradient from pitch predictor to encoder + energy_predictor_layers: 2 # number of conv layers in energy predictor + energy_predictor_chans: 256 # number of channels of conv layers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_dropout: 0.5 # dropout rate in energy predictor + energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy + energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy + stop_gradient_from_energy_predictor: false # whether to stop the gradient from energy predictor to encoder + + + +########################################################### +# UPDATER SETTING # +########################################################### +updater: + use_masking: True # whether to apply masking for padded part in loss calculation + + + +########################################################### +# OPTIMIZER SETTING # +########################################################### +optimizer: + optim: adam # optimizer type + learning_rate: 0.001 # learning rate + +########################################################### +# TRAINING SETTING # +########################################################### +max_epoch: 1000 +num_snapshots: 5 + + +########################################################### +# OTHER SETTING # +########################################################### +seed: 10086 diff --git a/modules/audio/tts/fastspeech2_ljspeech/assets/fastspeech2_nosil_ljspeech_ckpt_0.5/phone_id_map.txt b/modules/audio/tts/fastspeech2_ljspeech/assets/fastspeech2_nosil_ljspeech_ckpt_0.5/phone_id_map.txt new file mode 100644 index 0000000000000000000000000000000000000000..c840e98e2cb6a73acefbf08408d3e24a2b066cd1 --- /dev/null +++ b/modules/audio/tts/fastspeech2_ljspeech/assets/fastspeech2_nosil_ljspeech_ckpt_0.5/phone_id_map.txt @@ -0,0 +1,80 @@ + 0 + 1 +AA0 2 +AA1 3 +AA2 4 +AE0 5 +AE1 6 +AE2 7 +AH0 8 +AH1 9 +AH2 10 +AO0 11 +AO1 12 +AO2 13 +AW0 14 +AW1 15 +AW2 16 +AY0 17 +AY1 18 +AY2 19 +B 20 +CH 21 +D 22 +DH 23 +EH0 24 +EH1 25 +EH2 26 +ER0 27 +ER1 28 +ER2 29 +EY0 30 +EY1 31 +EY2 32 +F 33 +G 34 +HH 35 +IH0 36 +IH1 37 +IH2 38 +IY0 39 +IY1 40 +IY2 41 +JH 42 +K 43 +L 44 +M 45 +N 46 +NG 47 +OW0 48 +OW1 49 +OW2 50 +OY0 51 +OY1 52 +OY2 53 +P 54 +R 55 +S 56 +SH 57 +T 58 +TH 59 +UH0 60 +UH1 61 +UH2 62 +UW0 63 +UW1 64 +UW2 65 +V 66 +W 67 +Y 68 +Z 69 +ZH 70 +sil 71 +sp 72 +spl 73 +spn 74 +, 75 +. 76 +? 77 +! 78 + 79 diff --git a/modules/audio/tts/fastspeech2_ljspeech/assets/pwg_ljspeech_ckpt_0.5/pwg_default.yaml b/modules/audio/tts/fastspeech2_ljspeech/assets/pwg_ljspeech_ckpt_0.5/pwg_default.yaml new file mode 100644 index 0000000000000000000000000000000000000000..049ab93df16eb8a281950ce2ebab694f62a8fb2f --- /dev/null +++ b/modules/audio/tts/fastspeech2_ljspeech/assets/pwg_ljspeech_ckpt_0.5/pwg_default.yaml @@ -0,0 +1,119 @@ +# This is the hyperparameter configuration file for Parallel WaveGAN. +# Please make sure this is adjusted for the LJSpeech dataset. If you want to +# apply to the other dataset, you might need to carefully change some parameters. +# This configuration requires 12 GB GPU memory and takes ~3 days on TITAN V. + +########################################################### +# FEATURE EXTRACTION SETTING # +########################################################### +fs: 22050 # Sampling rate. +n_fft: 1024 # FFT size. (in samples) +n_shift: 256 # Hop size. (in samples) +win_length: null # Window length. (in samples) + # If set to null, it will be the same as fft_size. +window: "hann" # Window function. +n_mels: 80 # Number of mel basis. +fmin: 80 # Minimum freq in mel basis calculation. (Hz) +fmax: 7600 # Maximum frequency in mel basis calculation. (Hz) +trim_silence: false # Whether to trim the start and end of silence. +top_db: 60 # Need to tune carefully if the recording is not good. +trim_frame_length: 2048 # Frame size in trimming. (in samples) +trim_hop_length: 512 # Hop size in trimming. (in samples) + +########################################################### +# GENERATOR NETWORK ARCHITECTURE SETTING # +########################################################### +generator_params: + in_channels: 1 # Number of input channels. + out_channels: 1 # Number of output channels. + kernel_size: 3 # Kernel size of dilated convolution. + layers: 30 # Number of residual block layers. + stacks: 3 # Number of stacks i.e., dilation cycles. + residual_channels: 64 # Number of channels in residual conv. + gate_channels: 128 # Number of channels in gated conv. + skip_channels: 64 # Number of channels in skip conv. + aux_channels: 80 # Number of channels for auxiliary feature conv. + # Must be the same as num_mels. + aux_context_window: 2 # Context window size for auxiliary feature. + # If set to 2, previous 2 and future 2 frames will be considered. + dropout: 0.0 # Dropout rate. 0.0 means no dropout applied. + use_weight_norm: true # Whether to use weight norm. + # If set to true, it will be applied to all of the conv layers. + upsample_scales: [4, 4, 4, 4] # Upsampling scales. Prodcut of these must be the same as hop size. + +########################################################### +# DISCRIMINATOR NETWORK ARCHITECTURE SETTING # +########################################################### +discriminator_params: + in_channels: 1 # Number of input channels. + out_channels: 1 # Number of output channels. + kernel_size: 3 # Number of output channels. + layers: 10 # Number of conv layers. + conv_channels: 64 # Number of chnn layers. + bias: true # Whether to use bias parameter in conv. + use_weight_norm: true # Whether to use weight norm. + # If set to true, it will be applied to all of the conv layers. + nonlinear_activation: "LeakyReLU" # Nonlinear function after each conv. + nonlinear_activation_params: # Nonlinear function parameters + negative_slope: 0.2 # Alpha in LeakyReLU. + +########################################################### +# STFT LOSS SETTING # +########################################################### +stft_loss_params: + fft_sizes: [1024, 2048, 512] # List of FFT size for STFT-based loss. + hop_sizes: [120, 240, 50] # List of hop size for STFT-based loss + win_lengths: [600, 1200, 240] # List of window length for STFT-based loss. + window: "hann" # Window function for STFT-based loss + +########################################################### +# ADVERSARIAL LOSS SETTING # +########################################################### +lambda_adv: 4.0 # Loss balancing coefficient. + +########################################################### +# DATA LOADER SETTING # +########################################################### +batch_size: 8 # Batch size. +batch_max_steps: 25600 # Length of each audio in batch. Make sure dividable by hop_size. +pin_memory: true # Whether to pin memory in Pytorch DataLoader. +num_workers: 4 # Number of workers in Pytorch DataLoader. +remove_short_samples: true # Whether to remove samples the length of which are less than batch_max_steps. +allow_cache: true # Whether to allow cache in dataset. If true, it requires cpu memory. + +########################################################### +# OPTIMIZER & SCHEDULER SETTING # +########################################################### +generator_optimizer_params: + epsilon: 1.0e-6 # Generator's epsilon. + weight_decay: 0.0 # Generator's weight decay coefficient. +generator_scheduler_params: + learning_rate: 0.0001 # Generator's learning rate. + step_size: 200000 # Generator's scheduler step size. + gamma: 0.5 # Generator's scheduler gamma. + # At each step size, lr will be multiplied by this parameter. +generator_grad_norm: 10 # Generator's gradient norm. +discriminator_optimizer_params: + epsilon: 1.0e-6 # Discriminator's epsilon. + weight_decay: 0.0 # Discriminator's weight decay coefficient. +discriminator_scheduler_params: + learning_rate: 0.00005 # Discriminator's learning rate. + step_size: 200000 # Discriminator's scheduler step size. + gamma: 0.5 # Discriminator's scheduler gamma. + # At each step size, lr will be multiplied by this parameter. +discriminator_grad_norm: 1 # Discriminator's gradient norm. + +########################################################### +# INTERVAL SETTING # +########################################################### +discriminator_train_start_steps: 100000 # Number of steps to start to train discriminator. +train_max_steps: 400000 # Number of training steps. +save_interval_steps: 5000 # Interval steps to save checkpoint. +eval_interval_steps: 1000 # Interval steps to evaluate the network. + +########################################################### +# OTHER SETTING # +########################################################### +num_save_intermediate_results: 4 # Number of results to be saved as intermediate results. +num_snapshots: 10 # max number of snapshots to keep while training +seed: 42 # random seed for paddle, random, and np.random diff --git a/modules/audio/tts/fastspeech2_ljspeech/module.py b/modules/audio/tts/fastspeech2_ljspeech/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7281e1817b296323b41fb6879a4d11903c97f994 --- /dev/null +++ b/modules/audio/tts/fastspeech2_ljspeech/module.py @@ -0,0 +1,130 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path +from typing import List + +import numpy as np +import paddle +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger +from parakeet.frontend import English +from parakeet.models.fastspeech2 import FastSpeech2 +from parakeet.models.fastspeech2 import FastSpeech2Inference +from parakeet.models.parallel_wavegan import PWGGenerator +from parakeet.models.parallel_wavegan import PWGInference +from parakeet.modules.normalizer import ZScore +import soundfile as sf +from yacs.config import CfgNode +import yaml + + +@moduleinfo(name="fastspeech2_ljspeech", version="1.0.0", summary="", author="Baidu", author_email="", type="audio/tts") +class FastSpeech(paddle.nn.Layer): + def __init__(self, output_dir='./wavs'): + super(FastSpeech, self).__init__() + fastspeech2_res_dir = os.path.join(MODULE_HOME, 'fastspeech2_ljspeech', + 'assets/fastspeech2_nosil_ljspeech_ckpt_0.5') + pwg_res_dir = os.path.join(MODULE_HOME, 'fastspeech2_ljspeech', 'assets/pwg_ljspeech_ckpt_0.5') + + phones_dict = os.path.join(fastspeech2_res_dir, 'phone_id_map.txt') + with open(phones_dict, "r") as f: + phn_id = [line.strip().split() for line in f.readlines()] + vocab_size = len(phn_id) + self.phone_id_map = {} + for phn, _id in phn_id: + self.phone_id_map[phn] = int(_id) + + # fastspeech2 + fastspeech2_config = os.path.join(fastspeech2_res_dir, 'default.yaml') + with open(fastspeech2_config) as f: + fastspeech2_config = CfgNode(yaml.safe_load(f)) + self.samplerate = fastspeech2_config.fs + + fastspeech2_checkpoint = os.path.join(fastspeech2_res_dir, 'snapshot_iter_100000.pdz') + model = FastSpeech2(idim=vocab_size, odim=fastspeech2_config.n_mels, **fastspeech2_config["model"]) + model.set_state_dict(paddle.load(fastspeech2_checkpoint)["main_params"]) + logger.info('Load fastspeech2 params from %s' % os.path.abspath(fastspeech2_checkpoint)) + model.eval() + + # vocoder + pwg_config = os.path.join(pwg_res_dir, 'pwg_default.yaml') + with open(pwg_config) as f: + pwg_config = CfgNode(yaml.safe_load(f)) + + pwg_checkpoint = os.path.join(pwg_res_dir, 'pwg_snapshot_iter_400000.pdz') + vocoder = PWGGenerator(**pwg_config["generator_params"]) + vocoder.set_state_dict(paddle.load(pwg_checkpoint)["generator_params"]) + logger.info('Load vocoder params from %s' % os.path.abspath(pwg_checkpoint)) + vocoder.remove_weight_norm() + vocoder.eval() + + # frontend + self.frontend = English() + self.punc = ":,;。?!“”‘’':,;.?!" + + # stat + fastspeech2_stat = os.path.join(fastspeech2_res_dir, 'speech_stats.npy') + stat = np.load(fastspeech2_stat) + mu, std = stat + mu = paddle.to_tensor(mu) + std = paddle.to_tensor(std) + fastspeech2_normalizer = ZScore(mu, std) + + pwg_stat = os.path.join(pwg_res_dir, 'pwg_stats.npy') + stat = np.load(pwg_stat) + mu, std = stat + mu = paddle.to_tensor(mu) + std = paddle.to_tensor(std) + pwg_normalizer = ZScore(mu, std) + + # inference + self.fastspeech2_inference = FastSpeech2Inference(fastspeech2_normalizer, model) + self.pwg_inference = PWGInference(pwg_normalizer, vocoder) + + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + + def forward(self, text: str): + phones = self.frontend.phoneticize(text) + # remove start_symbol and end_symbol + phones = phones[1:-1] + phones = [phn for phn in phones if not phn.isspace()] + phones = [phn if (phn in self.phone_id_map and phn not in self.punc) else "sp" for phn in phones] + phone_ids = [self.phone_id_map[phn] for phn in phones] + phone_ids = paddle.to_tensor(phone_ids) + + with paddle.no_grad(): + mel = self.fastspeech2_inference(phone_ids) + wav = self.pwg_inference(mel) + + return wav + + @serving + def generate(self, sentences: List[str], device='cpu'): + assert isinstance(sentences, list) and isinstance(sentences[0], str), \ + 'Input data should be List[str], but got {}'.format(type(sentences)) + + paddle.set_device(device) + wav_files = [] + for i, sentence in enumerate(sentences): + wav = self(sentence) + wav_file = str(self.output_dir.absolute() / (str(i + 1) + ".wav")) + sf.write(wav_file, wav.numpy(), samplerate=self.samplerate) + wav_files.append(wav_file) + + logger.info('{} wave files have been generated in {}'.format(len(sentences), self.output_dir.absolute())) + return wav_files diff --git a/modules/audio/tts/fastspeech2_ljspeech/requirements.txt b/modules/audio/tts/fastspeech2_ljspeech/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f410f4f4238ed0017d04fb708edb3725c34784ac --- /dev/null +++ b/modules/audio/tts/fastspeech2_ljspeech/requirements.txt @@ -0,0 +1 @@ +git+https://github.com/PaddlePaddle/Parakeet@8040cb0#egg=paddle-parakeet diff --git a/modules/audio/tts/fastspeech_ljspeech/README.md b/modules/audio/tts/fastspeech_ljspeech/README.md index a2be971d3c301bb8c591d381ce43ab27e5beb65a..93dbe77c2b81059b0e52bb2935307c08c0372b2f 100644 --- a/modules/audio/tts/fastspeech_ljspeech/README.md +++ b/modules/audio/tts/fastspeech_ljspeech/README.md @@ -1,121 +1,148 @@ -## 概述 +# fastspeech_ljspeech + +|模型名称|fastspeech_ljspeech| +| :--- | :---: | +|类别|语音-语音合成| +|网络|FastSpeech| +|数据集|LJSpeech-1.1| +|是否支持Fine-tuning|否| +|模型大小|320MB| +|最新更新日期|2020-10-27| +|数据指标|-| + +## 一、模型基本信息 + +### 模型介绍 FastSpeech是基于Transformer的前馈神经网络,作者从encoder-decoder结构的teacher model中提取attention对角线来做发音持续时间预测,即使用长度调节器对文本序列进行扩展来匹配目标梅尔频谱的长度,以便并行生成梅尔频谱。该模型基本上消除了复杂情况下的跳词和重复的问题,并且可以平滑地调整语音速度,更重要的是,该模型大幅度提升了梅尔频谱的生成速度。fastspeech_ljspeech是基于ljspeech英文语音数据集预训练得到的英文TTS模型,仅支持预测。

-
+

更多详情参考论文[FastSpeech: Fast, Robust and Controllable Text to Speech](https://arxiv.org/abs/1905.09263) -## 命令行预测 -```shell -$ hub run fastspeech_ljspeech --input_text='Simple as this proposition is, it is necessary to be stated' --use_gpu True --vocoder griffin-lim -``` +## 二、安装 -## API +- ### 1、系统依赖 -```python -def synthesize(texts, use_gpu=False, speed=1.0, vocoder="griffin-lim"): -``` + 对于Ubuntu用户,请执行: + ``` + sudo apt-get install libsndfile1 + ``` + 对于Centos用户,请执行: + ``` + sudo yum install libsndfile + ``` -预测API,由输入文本合成对应音频波形。 +- ### 2、环境依赖 -**参数** + - 2.0.0 > paddlepaddle >= 1.8.2 -* texts (list\[str\]): 待预测文本; -* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; -* speed(float): 音频速度,1.0表示以原速输出。 -* vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" + - 2.0.0 > paddlehub >= 1.7.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -**返回** +- ### 3、安装 -* wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 -* sample\_rate (int): 合成音频的采样率。 + - ```shell + $ hub install fastspeech_ljspeech + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -**代码示例** -```python -import paddlehub as hub -import soundfile as sf +## 三、模型API预测 -# Load fastspeech_ljspeech module. -module = hub.Module(name="fastspeech_ljspeech") +- ### 1、命令行预测 -# Predict sentiment label -test_texts = ['Simple as this proposition is, it is necessary to be stated', - 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'] -wavs, sample_rate = module.synthesize(texts=test_texts) -for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) -``` + - ```shell + $ hub run fastspeech_ljspeech --input_text='Simple as this proposition is, it is necessary to be stated' --use_gpu True --vocoder griffin-lim + ``` + - 通过命令行方式实现语音合成模型的调用,更多请见[PaddleHub命令行指令](https://github.com/shinichiye/PaddleHub/blob/release/v2.1/docs/docs_ch/tutorial/cmd_usage.rst) -## 服务部署 +- ### 2、预测代码示例 -PaddleHub Serving 可以部署在线服务。 + - ```python + import paddlehub as hub + import soundfile as sf -### 第一步:启动PaddleHub Serving + # Load fastspeech_ljspeech module. + module = hub.Module(name="fastspeech_ljspeech") -运行启动命令: -```shell -$ hub serving start -m fastspeech_ljspeech -``` + # Predict sentiment label + test_texts = ['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'] + wavs, sample_rate = module.synthesize(texts=test_texts) + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) + ``` -这样就完成了一个服务化API的部署,默认端口号为8866。 +- ### 3、API -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - ```python + def synthesize(texts, use_gpu=False, speed=1.0, vocoder="griffin-lim"): + ``` -### 第二步:发送预测请求 + - 预测API,由输入文本合成对应音频波形。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - **参数** + - texts (list\[str\]): 待预测文本; + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; + - speed(float): 音频速度,1.0表示以原速输出。 + - vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" -```python -import requests -import json + - **返回** + - wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 + - sample\_rate (int): 合成音频的采样率。 -import soundfile as sf -# 发送HTTP请求 +## 四、服务部署 -data = {'texts':['Simple as this proposition is, it is necessary to be stated', - 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], - 'use_gpu':False} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/fastspeech_ljspeech" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) +- PaddleHub Serving可以部署一个在线语音合成服务,可以将此接口用于在线web应用。 -# 保存结果 -result = r.json()["results"] -wavs = result["wavs"] -sample_rate = result["sample_rate"] -for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) -``` +- ### 第一步:启动PaddleHub Serving -## 查看代码 + - 运行启动命令 + - ```shell + $ hub serving start -m fastspeech_ljspeech + ``` + - 这样就完成了服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -https://github.com/PaddlePaddle/Parakeet +- ### 第二步:发送预测请求 -### 依赖 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -paddlepaddle >= 1.8.2 + - ```python + import requests + import json -paddlehub >= 1.7.0 + import soundfile as sf -**NOTE:** 除了python依赖外还必须安装libsndfile库 + # 发送HTTP请求 -对于Ubuntu用户,请执行: -``` -sudo apt-get install libsndfile1 -``` -对于Centos用户,请执行: -``` -sudo yum install libsndfile -``` + data = {'texts':['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], + 'use_gpu':False} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/fastspeech_ljspeech" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) -## 更新历史 + # 保存结果 + result = r.json()["results"] + wavs = result["wavs"] + sample_rate = result["sample_rate"] + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) + ``` + + +## 五、更新历史 * 1.0.0 初始发布 + + ```shell + $ hub install fastspeech_ljspeech + ``` diff --git a/modules/audio/tts/transformer_tts_ljspeech/README.md b/modules/audio/tts/transformer_tts_ljspeech/README.md index 2be5603ed13006f6ab2d6f5ab2d21c6381b943a7..58d1bf569fe7e637a50bfe766bb95059f0486c3e 100644 --- a/modules/audio/tts/transformer_tts_ljspeech/README.md +++ b/modules/audio/tts/transformer_tts_ljspeech/README.md @@ -1,119 +1,147 @@ -## 概述 +# transformer_tts_ljspeech + +|模型名称|transformer_tts_ljspeech| +| :--- | :---: | +|类别|语音-语音合成| +|网络|Transformer| +|数据集|LJSpeech-1.1| +|是否支持Fine-tuning|否| +|模型大小|54MB| +|最新更新日期|2020-10-27| +|数据指标|-| + +## 一、模型基本信息 + +### 模型介绍 TansformerTTS 是使用了 Transformer 结构的端到端语音合成模型,对 Transformer 和 Tacotron2 进行了融合,取得了令人满意的效果。因为删除了 RNN 的循环连接,可并行的提供 decoder 的输入,进行并行训练,大大提升了模型的训练速度。transformer_tts_ljspeech是基于ljspeech英文语音数据集预训练得到的英文TTS模型,仅支持预测。

-
+

更多详情参考论文[Neural Speech Synthesis with Transformer Network](https://arxiv.org/abs/1809.08895) -## 命令行预测 -```shell -$ hub run transformer_tts_ljspeech --input_text="Life was like a box of chocolates, you never know what you're gonna get." --use_gpu True --vocoder griffin-lim -``` +## 二、安装 + +- ### 1、系统依赖 -## API + 对于Ubuntu用户,请执行: + ``` + sudo apt-get install libsndfile1 + ``` + 对于Centos用户,请执行: + ``` + sudo yum install libsndfile + ``` -```python -def synthesize(texts, use_gpu=False, vocoder="griffin-lim"): -``` +- ### 2、环境依赖 -预测API,由输入文本合成对应音频波形。 + - 2.0.0 > paddlepaddle >= 1.8.2 -**参数** + - 2.0.0 > paddlehub >= 1.7.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -* texts (list\[str\]): 待预测文本; -* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; -* vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" +- ### 3、安装 -**返回** + - ```shell + $ hub install transformer_tts_ljspeech + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 -* sample\_rate (int): 合成音频的采样率。 -**代码示例** +## 三、模型API预测 -```python -import paddlehub as hub -import soundfile as sf +- ### 1、命令行预测 -# Load transformer_tts_ljspeech module. -module = hub.Module(name="transformer_tts_ljspeech") + - ```shell + $ hub run transformer_tts_ljspeech --input_text="Life was like a box of chocolates, you never know what you're gonna get." --use_gpu True --vocoder griffin-lim + ``` + - 通过命令行方式实现语音合成模型的调用,更多请见[PaddleHub命令行指令](https://github.com/shinichiye/PaddleHub/blob/release/v2.1/docs/docs_ch/tutorial/cmd_usage.rst) -# Predict sentiment label -test_texts = ["Life was like a box of chocolates, you never know what you're gonna get."] -wavs, sample_rate = module.synthesize(texts=test_texts, use_gpu=True, vocoder="waveflow") -for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) -``` +- ### 2、预测代码示例 -## 服务部署 + - ```python + import paddlehub as hub + import soundfile as sf -PaddleHub Serving 可以部署在线服务。 + # Load transformer_tts_ljspeech module. + module = hub.Module(name="transformer_tts_ljspeech") -### 第一步:启动PaddleHub Serving + # Predict sentiment label + test_texts = ["Life was like a box of chocolates, you never know what you're gonna get."] + wavs, sample_rate = module.synthesize(texts=test_texts, use_gpu=True, vocoder="waveflow") + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) + ``` -运行启动命令: -```shell -$ hub serving start -m transformer_tts_ljspeech -``` +- ### 3、API -这样就完成了一个服务化API的部署,默认端口号为8866。 + - ```python + def synthesize(texts, use_gpu=False, vocoder="griffin-lim"): + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - 预测API,由输入文本合成对应音频波形。 -### 第二步:发送预测请求 + - **参数** + - texts (list\[str\]): 待预测文本; + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; + - vocoder: 指定声码器,可选 "griffin-lim"或"waveflow" -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - **返回** + - wavs (list): 语音合成结果列表,列表中每一个元素为对应输入文本的音频波形,可使用`soundfile.write`进一步处理或保存。 + - sample\_rate (int): 合成音频的采样率。 -```python -import requests -import json -import soundfile as sf +## 四、服务部署 -# 发送HTTP请求 +- PaddleHub Serving可以部署一个在线语音合成服务,可以将此接口用于在线web应用。 -data = {'texts':['Simple as this proposition is, it is necessary to be stated', - 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], - 'use_gpu':False} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/transformer_tts_ljspeech" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) +- ### 第一步:启动PaddleHub Serving -# 保存结果 -result = r.json()["results"] -wavs = result["wavs"] -sample_rate = result["sample_rate"] -for index, wav in enumerate(wavs): - sf.write(f"{index}.wav", wav, sample_rate) -``` + - 运行启动命令 -## 查看代码 + - ```shell + $ hub serving start -m transformer_tts_ljspeech + ``` + - 这样就完成了服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -https://github.com/PaddlePaddle/Parakeet +- ### 第二步:发送预测请求 -### 依赖 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -paddlepaddle >= 1.8.2 + - ```python + import requests + import json -paddlehub >= 1.7.0 + import soundfile as sf -**NOTE:** 除了python依赖外还必须安装libsndfile库 + # 发送HTTP请求 -对于Ubuntu用户,请执行: -``` -sudo apt-get install libsndfile1 -``` -对于Centos用户,请执行: -``` -sudo yum install libsndfile -``` + data = {'texts':['Simple as this proposition is, it is necessary to be stated', + 'Parakeet stands for Paddle PARAllel text-to-speech toolkit'], + 'use_gpu':False} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/transformer_tts_ljspeech" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) -## 更新历史 + # 保存结果 + result = r.json()["results"] + wavs = result["wavs"] + sample_rate = result["sample_rate"] + for index, wav in enumerate(wavs): + sf.write(f"{index}.wav", wav, sample_rate) + ``` + + +## 五、更新历史 * 1.0.0 初始发布 + + ```shell + $ hub install transformer_tts_ljspeech + ``` diff --git a/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/README.md b/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a4f9ac8a29269f31ea653db70d5ff92f36718672 --- /dev/null +++ b/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/README.md @@ -0,0 +1,111 @@ +# ge2e_fastspeech2_pwgan + +|模型名称|ge2e_fastspeech2_pwgan| +| :--- | :---: | +|类别|语音-声音克隆| +|网络|FastSpeech2| +|数据集|AISHELL-3| +|是否支持Fine-tuning|否| +|模型大小|462MB| +|最新更新日期|2021-12-17| +|数据指标|-| + +## 一、模型基本信息 + +### 模型介绍 + +声音克隆是指使用特定的音色,结合文字的读音合成音频,使得合成后的音频具有目标说话人的特征,从而达到克隆的目的。 + +在训练语音克隆模型时,目标音色作为Speaker Encoder的输入,模型会提取这段语音的说话人特征(音色)作为Speaker Embedding。接着,在训练模型重新合成此类音色的语音时,除了输入的目标文本外,说话人的特征也将成为额外条件加入模型的训练。 + +在预测时,选取一段新的目标音色作为Speaker Encoder的输入,并提取其说话人特征,最终实现输入为一段文本和一段目标音色,模型生成目标音色说出此段文本的语音片段。 + +![](https://ai-studio-static-online.cdn.bcebos.com/982ab955b87244d3bae3b003aff8e28d9ec159ff0d6246a79757339076dfe7d4) + +`ge2e_fastspeech2_pwgan`是一个支持中文的语音克隆模型,分别使用了LSTMSpeakerEncoder、FastSpeech2和PWGan模型分别用于语音特征提取、目标音频特征合成和语音波形转换。 + +关于模型的详请可参考[PaddleSpeech](https://github.com/PaddlePaddle/PaddleSpeech)。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.2.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install ge2e_fastspeech2_pwgan + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + model = hub.Module(name='ge2e_fastspeech2_pwgan', output_dir='./', speaker_audio='/data/man.wav') # 指定目标音色音频文件 + texts = [ + '语音的表现形式在未来将变得越来越重要$', + '今天的天气怎么样$', ] + wavs = model.generate(texts, use_gpu=True) + + for text, wav in zip(texts, wavs): + print('='*30) + print(f'Text: {text}') + print(f'Wav: {wav}') + ``` + +- ### 2、API + - ```python + def __init__(speaker_audio: str = None, + output_dir: str = './') + ``` + - 初始化module,可配置模型的目标音色的音频文件和输出的路径。 + + - **参数** + - `speaker_audio`(str): 目标说话人语音音频文件(*.wav)的路径,默认为None(使用默认的女声作为目标音色)。 + - `output_dir`(str): 合成音频的输出文件,默认为当前目录。 + + + - ```python + def get_speaker_embedding() + ``` + - 获取模型的目标说话人特征。 + + - **返回** + - `results`(numpy.ndarray): 长度为256的numpy数组,代表目标说话人的特征。 + + - ```python + def set_speaker_embedding(speaker_audio: str) + ``` + - 设置模型的目标说话人特征。 + + - **参数** + - `speaker_audio`(str): 必填,目标说话人语音音频文件(*.wav)的路径。 + + - ```python + def generate(data: Union[str, List[str]], use_gpu: bool = False): + ``` + - 根据输入文字,合成目标说话人的语音音频文件。 + + - **参数** + - `data`(Union[str, List[str]]): 必填,目标音频的内容文本列表,目前只支持中文,不支持添加标点符号。 + - `use_gpu`(bool): 是否使用gpu执行计算,默认为False。 + + +## 四、更新历史 + +* 1.0.0 + + 初始发布。 + + ```shell + $ hub install ge2e_fastspeech2_pwgan + ``` diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems/__init__.py b/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/__init__.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems/__init__.py rename to modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/__init__.py diff --git a/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/module.py b/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/module.py new file mode 100644 index 0000000000000000000000000000000000000000..9bea0832b9d67319a9ecf318ca1f3df9128df305 --- /dev/null +++ b/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/module.py @@ -0,0 +1,160 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import List, Union + +import numpy as np +import paddle +import soundfile as sf +import yaml +from yacs.config import CfgNode + +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger +from paddlespeech.t2s.frontend.zh_frontend import Frontend +from paddlespeech.t2s.models.fastspeech2 import FastSpeech2 +from paddlespeech.t2s.models.fastspeech2 import FastSpeech2Inference +from paddlespeech.t2s.models.parallel_wavegan import PWGGenerator +from paddlespeech.t2s.models.parallel_wavegan import PWGInference +from paddlespeech.t2s.modules.normalizer import ZScore +from paddlespeech.vector.exps.ge2e.audio_processor import SpeakerVerificationPreprocessor +from paddlespeech.vector.models.lstm_speaker_encoder import LSTMSpeakerEncoder + + +@moduleinfo( + name="ge2e_fastspeech2_pwgan", + version="1.0.0", + summary="", + author="paddlepaddle", + author_email="", + type="audio/voice_cloning", +) +class VoiceCloner(paddle.nn.Layer): + def __init__(self, speaker_audio: str = None, output_dir: str = './'): + super(VoiceCloner, self).__init__() + + speaker_encoder_ckpt = os.path.join(MODULE_HOME, 'ge2e_fastspeech2_pwgan', 'assets', + 'ge2e_ckpt_0.3/step-3000000.pdparams') + synthesizer_res_dir = os.path.join(MODULE_HOME, 'ge2e_fastspeech2_pwgan', 'assets', + 'fastspeech2_nosil_aishell3_vc1_ckpt_0.5') + vocoder_res_dir = os.path.join(MODULE_HOME, 'ge2e_fastspeech2_pwgan', 'assets', 'pwg_aishell3_ckpt_0.5') + + # Speaker encoder + self.speaker_processor = SpeakerVerificationPreprocessor( + sampling_rate=16000, + audio_norm_target_dBFS=-30, + vad_window_length=30, + vad_moving_average_width=8, + vad_max_silence_length=6, + mel_window_length=25, + mel_window_step=10, + n_mels=40, + partial_n_frames=160, + min_pad_coverage=0.75, + partial_overlap_ratio=0.5) + self.speaker_encoder = LSTMSpeakerEncoder(n_mels=40, num_layers=3, hidden_size=256, output_size=256) + self.speaker_encoder.set_state_dict(paddle.load(speaker_encoder_ckpt)) + self.speaker_encoder.eval() + + # Voice synthesizer + with open(os.path.join(synthesizer_res_dir, 'default.yaml'), 'r') as f: + fastspeech2_config = CfgNode(yaml.safe_load(f)) + with open(os.path.join(synthesizer_res_dir, 'phone_id_map.txt'), 'r') as f: + phn_id = [line.strip().split() for line in f.readlines()] + + model = FastSpeech2(idim=len(phn_id), odim=fastspeech2_config.n_mels, **fastspeech2_config["model"]) + model.set_state_dict(paddle.load(os.path.join(synthesizer_res_dir, 'snapshot_iter_96400.pdz'))["main_params"]) + model.eval() + + stat = np.load(os.path.join(synthesizer_res_dir, 'speech_stats.npy')) + mu, std = stat + mu = paddle.to_tensor(mu) + std = paddle.to_tensor(std) + fastspeech2_normalizer = ZScore(mu, std) + self.sample_rate = fastspeech2_config.fs + + self.fastspeech2_inference = FastSpeech2Inference(fastspeech2_normalizer, model) + self.fastspeech2_inference.eval() + + # Vocoder + with open(os.path.join(vocoder_res_dir, 'default.yaml')) as f: + pwg_config = CfgNode(yaml.safe_load(f)) + + vocoder = PWGGenerator(**pwg_config["generator_params"]) + vocoder.set_state_dict( + paddle.load(os.path.join(vocoder_res_dir, 'snapshot_iter_1000000.pdz'))["generator_params"]) + vocoder.remove_weight_norm() + vocoder.eval() + + stat = np.load(os.path.join(vocoder_res_dir, 'feats_stats.npy')) + mu, std = stat + mu = paddle.to_tensor(mu) + std = paddle.to_tensor(std) + pwg_normalizer = ZScore(mu, std) + + self.pwg_inference = PWGInference(pwg_normalizer, vocoder) + self.pwg_inference.eval() + + # Text frontend + self.frontend = Frontend(phone_vocab_path=os.path.join(synthesizer_res_dir, 'phone_id_map.txt')) + + # Speaking embedding + self._speaker_embedding = None + if speaker_audio is None or not os.path.isfile(speaker_audio): + speaker_audio = os.path.join(MODULE_HOME, 'ge2e_fastspeech2_pwgan', 'assets', 'voice_cloning.wav') + logger.warning(f'Due to no speaker audio is specified, speaker encoder will use defult ' + f'waveform({speaker_audio}) to extract speaker embedding. You can use ' + '"set_speaker_embedding()" method to reset a speaker audio for voice cloning.') + self.set_speaker_embedding(speaker_audio) + + self.output_dir = os.path.abspath(output_dir) + if not os.path.exists(self.output_dir): + os.makedirs(self.output_dir) + + def get_speaker_embedding(self): + return self._speaker_embedding.numpy() + + @paddle.no_grad() + def set_speaker_embedding(self, speaker_audio: str): + assert os.path.exists(speaker_audio), f'Speaker audio file: {speaker_audio} does not exists.' + mel_sequences = self.speaker_processor.extract_mel_partials( + self.speaker_processor.preprocess_wav(speaker_audio)) + self._speaker_embedding = self.speaker_encoder.embed_utterance(paddle.to_tensor(mel_sequences)) + + logger.info(f'Speaker embedding has been set from file: {speaker_audio}') + + @paddle.no_grad() + def generate(self, data: Union[str, List[str]], use_gpu: bool = False): + assert self._speaker_embedding is not None, f'Set speaker embedding before voice cloning.' + + if isinstance(data, str): + data = [data] + elif isinstance(data, list): + assert len(data) > 0 and isinstance(data[0], + str) and len(data[0]) > 0, f'Input data should be str of List[str].' + else: + raise Exception(f'Input data should be str of List[str].') + + paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') + files = [] + for idx, text in enumerate(data): + phone_ids = self.frontend.get_input_ids(text, merge_sentences=True)["phone_ids"][0] + wav = self.pwg_inference(self.fastspeech2_inference(phone_ids, spk_emb=self._speaker_embedding)) + output_wav = os.path.join(self.output_dir, f'{idx+1}.wav') + sf.write(output_wav, wav.numpy(), samplerate=self.sample_rate) + files.append(output_wav) + + return files diff --git a/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/requirements.txt b/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..120598fd26d619a674601ca3de0a9f7c1609ca99 --- /dev/null +++ b/modules/audio/voice_cloning/ge2e_fastspeech2_pwgan/requirements.txt @@ -0,0 +1 @@ +paddlespeech==0.1.0a13 diff --git a/modules/audio/voice_cloning/lstm_tacotron2/README.md b/modules/audio/voice_cloning/lstm_tacotron2/README.md index 58d6e846a25ddded31a10d6632aaaf6d7563f723..dedd5017324c10bc7a1f466d4a7367d80237ae53 100644 --- a/modules/audio/voice_cloning/lstm_tacotron2/README.md +++ b/modules/audio/voice_cloning/lstm_tacotron2/README.md @@ -1,8 +1,18 @@ -```shell -$ hub install lstm_tacotron2==1.0.0 -``` +# lstm_tacotron2 + +|模型名称|lstm_tacotron2| +| :--- | :---: | +|类别|语音-语音合成| +|网络|LSTM、Tacotron2、WaveFlow| +|数据集|AISHELL-3| +|是否支持Fine-tuning|否| +|模型大小|327MB| +|最新更新日期|2021-06-15| +|数据指标|-| + +## 一、模型基本信息 -## 概述 +### 模型介绍 声音克隆是指使用特定的音色,结合文字的读音合成音频,使得合成后的音频具有目标说话人的特征,从而达到克隆的目的。 @@ -10,93 +20,107 @@ $ hub install lstm_tacotron2==1.0.0 在预测时,选取一段新的目标音色作为Speaker Encoder的输入,并提取其说话人特征,最终实现输入为一段文本和一段目标音色,模型生成目标音色说出此段文本的语音片段。 -![](https://ai-studio-static-online.cdn.bcebos.com/982ab955b87244d3bae3b003aff8e28d9ec159ff0d6246a79757339076dfe7d4) +

+
+

`lstm_tacotron2`是一个支持中文的语音克隆模型,分别使用了LSTMSpeakerEncoder、Tacotron2和WaveFlow模型分别用于语音特征提取、目标音频特征合成和语音波形转换。 -关于模型的详请可参考[Parakeet](https://github.com/PaddlePaddle/Parakeet/tree/release/v0.3/parakeet/models)。 +更多详情请参考: +- [Transfer Learning from Speaker Verification to Multispeaker Text-To-Speech Synthesis](https://arxiv.org/pdf/1806.04558.pdf) +- [Parakeet](https://github.com/PaddlePaddle/Parakeet/tree/release/v0.3/parakeet/models) -## API +## 二、安装 -```python -def __init__(speaker_audio: str = None, - output_dir: str = './') -``` -初始化module,可配置模型的目标音色的音频文件和输出的路径。 +- ### 1、环境依赖 -**参数** -- `speaker_audio`(str): 目标说话人语音音频文件(*.wav)的路径,默认为None(使用默认的女声作为目标音色)。 -- `output_dir`(str): 合成音频的输出文件,默认为当前目录。 + - paddlepaddle >= 2.0.0 + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -```python -def get_speaker_embedding() -``` -获取模型的目标说话人特征。 +- ### 2、安装 -**返回** -* `results`(numpy.ndarray): 长度为256的numpy数组,代表目标说话人的特征。 + - ```shell + $ hub install lstm_tacotron2 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -def set_speaker_embedding(speaker_audio: str) -``` -设置模型的目标说话人特征。 -**参数** -- `speaker_audio`(str): 必填,目标说话人语音音频文件(*.wav)的路径。 +## 三、模型API预测 -```python -def generate(data: List[str], batch_size: int = 1, use_gpu: bool = False): -``` -根据输入文字,合成目标说话人的语音音频文件。 +- ### 1、预测代码示例 -**参数** -- `data`(List[str]): 必填,目标音频的内容文本列表,目前只支持中文,不支持添加标点符号。 -- `batch_size`(int): 可选,模型合成语音时的batch_size,默认为1。 -- `use_gpu`(bool): 是否使用gpu执行计算,默认为False。 + - ```python + import paddlehub as hub + model = hub.Module(name='lstm_tacotron2', output_dir='/data', speaker_audio='/data/man.wav') # 指定目标音色音频文件 + texts = [ + '语音的表现形式在未来将变得越来越重要$', + '今天的天气怎么样$', ] + wavs = model.generate(texts, use_gpu=True) -**代码示例** + for text, wav in zip(texts, wavs): + print('='*30) + print(f'Text: {text}') + print(f'Wav: {wav}') + ``` + ``` + ============================== + Text: 语音的表现形式在未来将变得越来越重要$ + Wav: /data/1.wav + ============================== + Text: 今天的天气怎么样$ + Wav: /data/2.wav + ``` -```python -import paddlehub as hub +- ### 2、API -model = hub.Module(name='lstm_tacotron2', output_dir='./', speaker_audio='/data/man.wav') # 指定目标音色音频文件 -texts = [ - '语音的表现形式在未来将变得越来越重要$', - '今天的天气怎么样$', ] -wavs = model.generate(texts, use_gpu=True) + - ```python + def __init__(speaker_audio: str = None, + output_dir: str = './') + ``` + - 初始化module,可配置模型的目标音色的音频文件和输出的路径。 -for text, wav in zip(texts, wavs): - print('='*30) - print(f'Text: {text}') - print(f'Wav: {wav}') -``` + - **参数** + - `speaker_audio`(str): 目标说话人语音音频文件(*.wav)的路径,默认为None(使用默认的女声作为目标音色)。 + - `output_dir`(str): 合成音频的输出文件,默认为当前目录。 -输出 -``` -============================== -Text: 语音的表现形式在未来将变得越来越重要$ -Wav: /data/1.wav -============================== -Text: 今天的天气怎么样$ -Wav: /data/2.wav -``` + - ```python + def get_speaker_embedding() + ``` + - 获取模型的目标说话人特征。 + + - **返回** + - `results`(numpy.ndarray): 长度为256的numpy数组,代表目标说话人的特征。 -## 查看代码 + - ```python + def set_speaker_embedding(speaker_audio: str) + ``` + - 设置模型的目标说话人特征。 -https://github.com/PaddlePaddle/Parakeet + - **参数** + - `speaker_audio`(str): 必填,目标说话人语音音频文件(*.wav)的路径。 -## 依赖 + - ```python + def generate(data: List[str], batch_size: int = 1, use_gpu: bool = False): + ``` + - 根据输入文字,合成目标说话人的语音音频文件。 -paddlepaddle >= 2.0.0 + - **参数** + - `data`(List[str]): 必填,目标音频的内容文本列表,目前只支持中文,不支持添加标点符号。 + - `batch_size`(int): 可选,模型合成语音时的batch_size,默认为1。 + - `use_gpu`(bool): 是否使用gpu执行计算,默认为False。 -paddlehub >= 2.1.0 -## 更新历史 +## 四、更新历史 * 1.0.0 初始发布 + +```shell +$ hub install lstm_tacotron2==1.0.0 +``` diff --git a/modules/image/Image_editing/colorization/deoldify/README.md b/modules/image/Image_editing/colorization/deoldify/README.md index 21bfe6dabe92a0fd9c054a3cb447c1d166cca1e6..a181b89bdcc802fa5c6129d5d466472e80bfb258 100644 --- a/modules/image/Image_editing/colorization/deoldify/README.md +++ b/modules/image/Image_editing/colorization/deoldify/README.md @@ -1,121 +1,172 @@ +# deoldify -## 模型概述 -deoldify是用于图像和视频的着色渲染模型,该模型能够实现给黑白照片和视频恢复原彩。 +|模型名称|deoldify| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|NoGAN| +|数据集|ILSVRC 2012| +|是否支持Fine-tuning|否| +|模型大小|834MB| +|指标|-| +|最新更新日期|2021-04-13| -## API 说明 -```python -def predict(self, input): -``` +## 一、模型基本信息 -着色变换API,得到着色后的图片或者视频。 +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ +

+- ### 模型介绍 -**参数** + - deoldify是用于图像和视频的着色渲染模型,该模型能够实现给黑白照片和视频恢复原彩。 -* input(str): 图片或者视频的路径; + - 更多详情请参考:[deoldify](https://github.com/jantic/DeOldify) -**返回** +## 二、安装 -若输入是图片,返回值为: -* pred_img(np.ndarray): BGR图片数据; -* out_path(str): 保存图片路径。 +- ### 1、环境依赖 -若输入是视频,返回值为: -* frame_pattern_combined(str): 视频着色后单帧数据保存路径; -* vid_out_path(str): 视频保存路径。 + - paddlepaddle >= 2.0.0 -```python -def run_image(self, img): -``` -图像着色API, 得到着色后的图片。 + - paddlehub >= 2.0.0 -**参数** + - NOTE: 使用该模型需要自行安装ffmpeg,若您使用conda环境,推荐使用如下语句进行安装。 -* img (str|np.ndarray): 图片路径或则BGR格式图片。 + ```shell + $ conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge + ``` -**返回** -* pred_img(np.ndarray): BGR图片数据; +- ### 2、安装 + - ```shell + $ hub install deoldify + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -def run_video(self, video): -``` -视频着色API, 得到着色后的视频。 -**参数** -* video (str): 待处理视频路径。 -**返回** +## 三、模型API预测 + - ### 1、代码示例 -* frame_pattern_combined(str): 视频着色后单帧数据保存路径; -* vid_out_path(str): 视频保存路径。 + ```python + import paddlehub as hub -## 预测代码示例 + model = hub.Module(name='deoldify') + model.predict('/PATH/TO/IMAGE/OR/VIDEO') + ``` -```python -import paddlehub as hub + - ### 2、API -model = hub.Module(name='deoldify') -model.predict('/PATH/TO/IMAGE/OR/VIDEO') -``` + - ```python + def predict(self, input): + ``` -## 服务部署 + - 着色变换API,得到着色后的图片或者视频。 -PaddleHub Serving可以部署一个在线照片着色服务。 + - **参数** -## 第一步:启动PaddleHub Serving + - input(str): 图片或者视频的路径; -运行启动命令: -```shell -$ hub serving start -m deoldify -``` + - **返回** -这样就完成了一个图像着色的在线服务API的部署,默认端口号为8866。 + - 若输入是图片,返回值为: + - pred_img(np.ndarray): BGR图片数据; + - out_path(str): 保存图片路径。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - 若输入是视频,返回值为: + - frame_pattern_combined(str): 视频着色后单帧数据保存路径; + - vid_out_path(str): 视频保存路径。 -## 第二步:发送预测请求 + - ```python + def run_image(self, img): + ``` + - 图像着色API, 得到着色后的图片。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - **参数** -```python -import requests -import json -import base64 + - img (str|np.ndarray): 图片路径或则BGR格式图片。 -import cv2 -import numpy as np + - **返回** -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - pred_img(np.ndarray): BGR图片数据; -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/ORIGIN/IMAGE') -data = {'images':cv2_to_base64(org_im)} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/deoldify" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -img = base64_to_cv2(r.json()["results"]) -cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) -``` + - ```python + def run_video(self, video): + ``` + - 视频着色API, 得到着色后的视频。 -## 模型相关信息 + - **参数** -### 模型代码 + - video (str): 待处理视频路径。 -https://github.com/jantic/DeOldify + - **返回** -### 依赖 + - frame_pattern_combined(str): 视频着色后单帧数据保存路径; + - vid_out_path(str): 视频保存路径。 -paddlepaddle >= 2.0.0rc +## 四、服务部署 -paddlehub >= 1.8.3 +- PaddleHub Serving可以部署一个在线照片着色服务 + + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m deoldify + ``` + + - 这样就完成了一个图像着色的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/ORIGIN/IMAGE') + data = {'images':cv2_to_base64(org_im)} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/deoldify" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + img = base64_to_cv2(r.json()["results"]) + cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 适配paddlehub2.0版本 diff --git a/modules/image/Image_editing/colorization/photo_restoration/README.md b/modules/image/Image_editing/colorization/photo_restoration/README.md index 653b313cf51b7c8b023bda0ec61238d591cdc85e..e3a2d5fd3459e07a4045ccfb3f20b5774826e773 100644 --- a/modules/image/Image_editing/colorization/photo_restoration/README.md +++ b/modules/image/Image_editing/colorization/photo_restoration/README.md @@ -1,98 +1,148 @@ -## 模型概述 +# photo_restoration -photo_restoration 是针对老照片修复的模型。它主要由两个部分组成:着色和超分。着色模型基于deoldify -,超分模型基于realsr. 用户可以根据自己的需求选择对图像进行着色或超分操作。因此在使用该模型时,请预先安装deoldify和realsr两个模型。 +|模型名称|photo_restoration| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|基于deoldify和realsr模型| +|是否支持Fine-tuning|否| +|模型大小|64MB+834MB| +|指标|-| +|最新更新日期|2021-08-19| -## API -```python -def run_image(self, - input, - model_select= ['Colorization', 'SuperResolution'], - save_path = 'photo_restoration'): -``` +## 一、模型基本信息 -预测API,用于图片修复。 +- ### 应用效果展示 + - 样例结果示例(左为原图,右为效果图): +

+ +

-**参数** -* input (numpy.ndarray|str): 图片数据,numpy.ndarray 或者 str形式。ndarray.shape 为 \[H, W, C\],BGR格式; str为图片的路径。 -* model_select (list\[str\]): 选择对图片对操作,\['Colorization'\]对图像只进行着色操作, \['SuperResolution'\]对图像只进行超分操作; -默认值为\['Colorization', 'SuperResolution'\]。 +- ### 模型介绍 -* save_path (str): 保存图片的路径, 默认为'photo_restoration'。 + - photo_restoration 是针对老照片修复的模型。它主要由两个部分组成:着色和超分。着色模型基于deoldify + ,超分模型基于realsr. 用户可以根据自己的需求选择对图像进行着色或超分操作。因此在使用该模型时,请预先安装deoldify和realsr两个模型。 -**返回** +## 二、安装 -* output (numpy.ndarray): 照片修复结果,ndarray.shape 为 \[H, W, C\],BGR格式。 +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 + - paddlehub >= 2.0.0 -## 代码示例 + - NOTE: 使用该模型需要自行安装ffmpeg,若您使用conda环境,推荐使用如下语句进行安装。 -图片修复代码示例: + ```shell + $ conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge + ``` + +- ### 2、安装 + - ```shell + $ hub install photo_restoration + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -import cv2 -import paddlehub as hub -model = hub.Module(name='photo_restoration', visualization=True) -im = cv2.imread('/PATH/TO/IMAGE') -res = model.run_image(im) -``` +## 三、模型API预测 + - ### 1、代码示例 -## 服务部署 + ```python + import cv2 + import paddlehub as hub -PaddleHub Serving可以部署一个照片修复的在线服务。 + model = hub.Module(name='photo_restoration', visualization=True) + im = cv2.imread('/PATH/TO/IMAGE') + res = model.run_image(im) -## 第一步:启动PaddleHub Serving + ``` +- ### 2、API -运行启动命令: -```shell -$ hub serving start -m photo_restoration -``` + ```python + def run_image(self, + input, + model_select= ['Colorization', 'SuperResolution'], + save_path = 'photo_restoration'): + ``` -这样就完成了一个照片修复的服务化API的部署,默认端口号为8866。 + - 预测API,用于图片修复。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - **参数** -## 第二步:发送预测请求 + - input (numpy.ndarray|str): 图片数据,numpy.ndarray 或者 str形式。ndarray.shape 为 \[H, W, C\],BGR格式; str为图片的路径。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - model_select (list\[str\]): 选择对图片对操作,\['Colorization'\]对图像只进行着色操作, \['SuperResolution'\]对图像只进行超分操作; + 默认值为\['Colorization', 'SuperResolution'\]。 -```python -import requests -import json -import base64 + - save_path (str): 保存图片的路径, 默认为'photo_restoration'。 -import cv2 -import numpy as np + - **返回** -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - output (numpy.ndarray): 照片修复结果,ndarray.shape 为 \[H, W, C\],BGR格式。 -# 发送HTTP请求 -org_im = cv2.imread('PATH/TO/IMAGE') -data = {'images':cv2_to_base64(org_im), 'model_select': ['Colorization', 'SuperResolution']} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/photo_restoration" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -img = base64_to_cv2(r.json()["results"]) -cv2.imwrite('PATH/TO/SAVE/IMAGE', img) -``` -### 依赖 -paddlepaddle >= 2.0.0rc +## 四、服务部署 -paddlehub >= 1.8.2 +- PaddleHub Serving可以部署一个照片修复的在线服务。 + +- ## 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m photo_restoration + ``` + + - 这样就完成了一个照片修复的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('PATH/TO/IMAGE') + data = {'images':cv2_to_base64(org_im), 'model_select': ['Colorization', 'SuperResolution']} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/photo_restoration" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + img = base64_to_cv2(r.json()["results"]) + cv2.imwrite('PATH/TO/SAVE/IMAGE', img) + ``` + +## 五、更新历史 + + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 适配paddlehub2.0版本 diff --git a/modules/image/Image_editing/colorization/user_guided_colorization/README.md b/modules/image/Image_editing/colorization/user_guided_colorization/README.md new file mode 100644 index 0000000000000000000000000000000000000000..390f04e1500e1d3d0ae1215f798bb9f7902f1fdc --- /dev/null +++ b/modules/image/Image_editing/colorization/user_guided_colorization/README.md @@ -0,0 +1,204 @@ +# user_guided_colorization + +|模型名称|user_guided_colorization| +| :--- | :---: | +|类别|图像-图像编辑| +|网络| Local and Global Hints Network | +|数据集|ILSVRC 2012| +|是否支持Fine-tuning|是| +|模型大小|131MB| +|指标|-| +|最新更新日期|2021-02-26| + + +## 一、模型基本信息 + +- ### 模型介绍 + +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ +

+ + - user_guided_colorization 是基于''Real-Time User-Guided Image Colorization with Learned Deep Priors"的着色模型,该模型利用预先提供的着色块对图像进行着色。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install user_guided_colorization + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + $ hub run user_guided_colorization --input_path "/PATH/TO/IMAGE" + ``` +- ### 2.预测代码示例 + + ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + + model = hub.Module(name='user_guided_colorization') + model.set_config(prob=0.1) + result = model.predict(images=['/PATH/TO/IMAGE']) + ``` +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用user_guided_colorization模型对[Canvas](../../docs/reference/datasets.md#class-hubdatasetsCanvas)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + + transform = T.Compose([T.Resize((256, 256), interpolation='NEAREST'), + T.RandomPaddingCrop(crop_size=176), + T.RGB2LAB()], to_rgb=True) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Canvas + + color_set = Canvas(transform=transform, mode='train') + ``` + + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + * `hub.datasets.Canvas()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name='user_guided_colorization', load_checkpoint=None) + model.set_config(classification=True, prob=1) + ``` + * `name`:加载模型的名字。 + * `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + * `classification`: 着色模型分两部分训练,开始阶段`classification`设置为True, 用于浅层网络训练。训练后期将`classification`设置为False, 用于训练网络的输出层。 + * `prob`: 每张输入图不加一个先验彩色块的概率,默认为1,即不加入先验彩色块。例如,当`prob`设定为0.9时,一张图上有两个先验彩色块的概率为(1-0.9)*(1-0.9)*0.9=0.009. + + - Step4: 选择优化策略和运行配置 + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_colorization_ckpt_cls_1') + trainer.train(color_set, epochs=201, batch_size=25, eval_dataset=color_set, log_interval=10, save_interval=10) + ``` + + + - 运行配置 + + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; + + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: + + - ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='user_guided_colorization', load_checkpoint='/PATH/TO/CHECKPOINT') + model.set_config(prob=0.1) + result = model.predict(images=['house.png']) + ``` + + + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。若想获取油画风着色效果,请下载参数文件[油画着色](https://paddlehub.bj.bcebos.com/dygraph/models/canvas_rc.pdparams) + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线着色任务服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m user_guided_colorization + ``` + + - 这样就完成了一个着色任务服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/user_guided_colorization" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data = base64_to_cv2(r.json()["results"]['data'][0]['fake_reg']) + cv2.imwrite('color.png', data) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + diff --git a/modules/image/Image_editing/super_resolution/dcscn/README.md b/modules/image/Image_editing/super_resolution/dcscn/README.md index da9bfa44b9fdc496e52ac60f2c810c959fbf52eb..15722b2f2e03999f33597fc8f224d22b9a3d6334 100644 --- a/modules/image/Image_editing/super_resolution/dcscn/README.md +++ b/modules/image/Image_editing/super_resolution/dcscn/README.md @@ -1,134 +1,173 @@ -## 模型概述 +# dcscn -DCSCN是基于Fast and Accurate Image Super Resolution by Deep CNN with Skip Connection and Network in Network设计的轻量化超分辨模型。该模型使用残差结构和跳连的方式构建网络来提取局部和全局特征,同时使用并行1*1的卷积网络学习细节特征提升模型性能。该模型提供的超分倍数为2倍。 -## 命令行预测 +|模型名称|dcscn| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|dcscn| +|数据集|DIV2k| +|是否支持Fine-tuning|否| +|模型大小|260KB| +|指标|PSNR37.63| +|最新更新日期|2021-02-26| -``` -$ hub run dcscn --input_path "/PATH/TO/IMAGE" -``` +## 一、模型基本信息 -## API +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ +

-```python -def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="dcscn_output") -``` -预测API,用于图像超分辨率。 +- ### 模型介绍 -**参数** + - DCSCN是基于Fast and Accurate Image Super Resolution by Deep CNN with Skip Connection and Network in Network设计的轻量化超分辨模型。该模型使用残差结构和跳连的方式构建网络来提取局部和全局特征,同时使用并行1*1的卷积网络学习细节特征提升模型性能。该模型提供的超分倍数为2倍。 -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + - 更多详情请参考:[dcscn](https://github.com/jiny2001/dcscn-super-resolution) -**返回** +## 二、安装 -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 超分辨后图像。 +- ### 1、环境依赖 -```python -def save_inference_model(self, - dirname='dcscn_save_model', - model_filename=None, - params_filename=None, - combined=False) -``` + - paddlepaddle >= 2.0.0 -将模型保存到指定路径。 + - paddlehub >= 2.0.0 -**参数** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 +- ### 2、安装 + - ```shell + $ hub install dcscn + ``` -## 代码示例 + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -import cv2 -import paddlehub as hub +## 三、模型API预测 +- ### 1、命令行预测 -sr_model = hub.Module(name='dcscn') -im = cv2.imread('/PATH/TO/IMAGE').astype('float32') -#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 -res = sr_model.reconstruct(images=[im], visualization=True) -print(res[0]['data']) -sr_model.save_inference_model() -``` + - ``` + $ hub run dcscn --input_path "/PATH/TO/IMAGE" + ``` +- ### 2、预测代码示例 -## 服务部署 + ```python + import cv2 + import paddlehub as hub -PaddleHub Serving可以部署一个图像超分的在线服务。 + sr_model = hub.Module(name='dcscn') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + #visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` -## 第一步:启动PaddleHub Serving +- ### 3、API -运行启动命令: + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="dcscn_output") + ``` -```shell -$ hub serving start -m dcscn -``` + - 预测API,用于图像超分辨率。 -这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + - **参数** -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -## 第二步:发送预测请求 + - **返回** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 -```python -import requests -import json -import base64 + - ```python + def save_inference_model(self, + dirname='dcscn_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` -import cv2 -import numpy as np + - 将模型保存到指定路径。 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - **参数** -# 发送HTTP请求 + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -org_im = cv2.imread('/PATH/TO/IMAGE') -data = {'images':[cv2_to_base64(org_im)]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/dcscn" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -sr = np.expand_dims(cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY), axis=2) -shape =sr.shape -org_im = cv2.cvtColor(org_im, cv2.COLOR_BGR2YUV) -uv = cv2.resize(org_im[...,1:], (shape[1], shape[0]), interpolation=cv2.INTER_CUBIC) -combine_im = cv2.cvtColor(np.concatenate((sr, uv), axis=2), cv2.COLOR_YUV2BGR) -cv2.imwrite('dcscn_X2.png', combine_im) -print("save image as dcscn_X2.png") -``` -### 查看代码 +## 四、服务部署 -https://github.com/jiny2001/dcscn-super-resolution +- PaddleHub Serving可以部署一个图像超分的在线服务。 +- ### 第一步:启动PaddleHub Serving + - 运行启动命令: -### 依赖 + - ```shell + $ hub serving start -m dcscn + ``` -paddlepaddle >= 1.8.0 + - 这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 -paddlehub >= 1.7.1 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + + - ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/dcscn" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + sr = np.expand_dims(cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY), axis=2) + shape =sr.shape + org_im = cv2.cvtColor(org_im, cv2.COLOR_BGR2YUV) + uv = cv2.resize(org_im[...,1:], (shape[1], shape[0]), interpolation=cv2.INTER_CUBIC) + combine_im = cv2.cvtColor(np.concatenate((sr, uv), axis=2), cv2.COLOR_YUV2BGR) + cv2.imwrite('dcscn_X2.png', combine_im) + print("save image as dcscn_X2.png") + ``` + + +## 五、更新历史 + + +* 1.0.0 + + 初始发布 diff --git a/modules/image/Image_editing/super_resolution/falsr_a/README.md b/modules/image/Image_editing/super_resolution/falsr_a/README.md index 2981753ca3512962fc3a05c60df8ef2203e78323..f1b98a651387342bffb3397a3f4ada31cc61411d 100644 --- a/modules/image/Image_editing/super_resolution/falsr_a/README.md +++ b/modules/image/Image_editing/super_resolution/falsr_a/README.md @@ -1,126 +1,169 @@ -## 模型概述 +# falsr_a -falsr_a是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 -## 命令行预测 +|模型名称|falsr_a| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|falsr_a| +|数据集|DIV2k| +|是否支持Fine-tuning|否| +|模型大小|8.9MB| +|指标|PSNR37.82| +|最新更新日期|2021-02-26| -``` -$ hub run falsr_a --input_path "/PATH/TO/IMAGE" -``` +## 一、模型基本信息 -## API +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ +

-```python -def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="falsr_a_output") -``` -预测API,用于图像超分辨率。 +- ### 模型介绍 -**参数** + - falsr_a是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + - 更多详情请参考:[falsr_a](https://github.com/xiaomi-automl/FALSR) -**返回** +## 二、安装 -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 超分辨后图像。 +- ### 1、环境依赖 -```python -def save_inference_model(self, - dirname='falsr_a_save_model', - model_filename=None, - params_filename=None, - combined=False) -``` + - paddlepaddle >= 2.0.0 -将模型保存到指定路径。 + - paddlehub >= 2.0.0 -**参数** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 +- ### 2、安装 + - ```shell + $ hub install falsr_a + ``` -## 代码示例 + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -import cv2 -import paddlehub as hub +## 三、模型API预测 +- ### 1、命令行预测 -sr_model = hub.Module(name='falsr_a') -im = cv2.imread('/PATH/TO/IMAGE').astype('float32') -#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 -res = sr_model.reconstruct(images=[im], visualization=True) -print(res[0]['data']) -sr_model.save_inference_model() -``` + - ``` + $ hub run falsr_a --input_path "/PATH/TO/IMAGE" + ``` +- ### 2、预测代码示例 -## 服务部署 + ```python + import cv2 + import paddlehub as hub -PaddleHub Serving可以部署一个图像超分的在线服务。 + sr_model = hub.Module(name='falsr_a') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + #visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` -## 第一步:启动PaddleHub Serving +- ### 3、API -运行启动命令: + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_a_output") + ``` -```shell -$ hub serving start -m falsr_a -``` + - 预测API,用于图像超分辨率。 -这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + - **参数** -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -## 第二步:发送预测请求 + - **返回** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 -```python -import requests -import json -import base64 + - ```python + def save_inference_model(self, + dirname='falsr_a_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` -import cv2 -import numpy as np + - 将模型保存到指定路径。 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - **参数** -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') -data = {'images':[cv2_to_base64(org_im)]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/falsr_a" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -sr = base64_to_cv2(r.json()["results"][0]['data']) -cv2.imwrite('falsr_a_X2.png', sr) -print("save image as falsr_a_X2.png") -``` -### 查看代码 + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -https://github.com/xiaomi-automl/FALSR -### 依赖 +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像超分的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m falsr_a + ``` + + - 这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + + - ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/falsr_a" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + sr = base64_to_cv2(r.json()["results"][0]['data']) + cv2.imwrite('falsr_a_X2.png', sr) + print("save image as falsr_a_X2.png") + ``` + + +## 五、更新历史 + + +* 1.0.0 + + 初始发布 -paddlepaddle >= 1.8.0 -paddlehub >= 1.7.1 diff --git a/modules/image/Image_editing/super_resolution/falsr_b/README.md b/modules/image/Image_editing/super_resolution/falsr_b/README.md index f54f159d57e81c98d3d503da9bc68afd877ee796..b74a5f894791719d8d0b61ca666b395f318076a4 100644 --- a/modules/image/Image_editing/super_resolution/falsr_b/README.md +++ b/modules/image/Image_editing/super_resolution/falsr_b/README.md @@ -1,126 +1,170 @@ -## 模型概述 +# falsr_b -falsr_b是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。falsr_b较falsr_a更轻量化。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 -## 命令行预测 +|模型名称|falsr_b| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|falsr_b| +|数据集|DIV2k| +|是否支持Fine-tuning|否| +|模型大小|4MB| +|指标|PSNR37.61| +|最新更新日期|2021-02-26| -``` -$ hub run falsr_b --input_path "/PATH/TO/IMAGE" -``` +## 一、模型基本信息 -## API +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ +

-```python -def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=True, - output_dir="falsr_b_output") -``` -预测API,用于图像超分辨率。 +- ### 模型介绍 -**参数** + - falsr_b是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + - 更多详情请参考:[falsr_b](https://github.com/xiaomi-automl/FALSR) -**返回** +## 二、安装 -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 超分辨后图像。 +- ### 1、环境依赖 -```python -def save_inference_model(self, - dirname='falsr_b_save_model', - model_filename=None, - params_filename=None, - combined=False) -``` + - paddlepaddle >= 2.0.0 -将模型保存到指定路径。 + - paddlehub >= 2.0.0 -**参数** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 +- ### 2、安装 + - ```shell + $ hub install falsr_b + ``` -## 代码示例 + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -import cv2 -import paddlehub as hub +## 三、模型API预测 +- ### 1、命令行预测 -sr_model = hub.Module(name='falsr_b') -im = cv2.imread('/PATH/TO/IMAGE').astype('float32') -#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 -res = sr_model.reconstruct(images=[im], visualization=True) -print(res[0]['data']) -sr_model.save_inference_model() -``` + - ``` + $ hub run falsr_b --input_path "/PATH/TO/IMAGE" + ``` +- ### 2、预测代码示例 -## 服务部署 + ```python + import cv2 + import paddlehub as hub -PaddleHub Serving可以部署一个图像超分的在线服务。 + sr_model = hub.Module(name='falsr_b') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + #visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` -## 第一步:启动PaddleHub Serving +- ### 3、API -运行启动命令: + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_b_output") + ``` -```shell -$ hub serving start -m falsr_b -``` + - 预测API,用于图像超分辨率。 -这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + - **参数** -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -## 第二步:发送预测请求 + - **返回** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 -```python -import requests -import json -import base64 + - ```python + def save_inference_model(self, + dirname='falsr_b_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` -import cv2 -import numpy as np + - 将模型保存到指定路径。 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - **参数** -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') -data = {'images':[cv2_to_base64(org_im)]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/falsr_b" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -sr = base64_to_cv2(r.json()["results"][0]['data']) -cv2.imwrite('falsr_b_X2.png', sr) -print("save image as falsr_b_X2.png") -``` + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -### 查看代码 -https://github.com/xiaomi-automl/FALSR -### 依赖 +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像超分的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m falsr_b + ``` + + - 这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + + - ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/falsr_b" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + sr = base64_to_cv2(r.json()["results"][0]['data']) + cv2.imwrite('falsr_b_X2.png', sr) + print("save image as falsr_b_X2.png") + ``` + + +## 五、更新历史 + + +* 1.0.0 + + 初始发布 + -paddlepaddle >= 1.8.0 -paddlehub >= 1.7.1 diff --git a/modules/image/Image_editing/super_resolution/falsr_c/README.md b/modules/image/Image_editing/super_resolution/falsr_c/README.md index c61b2ed416991f4bf70e12070b2499bef1af2bba..3227847494d5b34867aa7ee36e91ff789ad80574 100644 --- a/modules/image/Image_editing/super_resolution/falsr_c/README.md +++ b/modules/image/Image_editing/super_resolution/falsr_c/README.md @@ -1,127 +1,168 @@ -## 模型概述 +# falsr_c -falsr_c是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 -## 命令行预测 +|模型名称|falsr_c| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|falsr_c| +|数据集|DIV2k| +|是否支持Fine-tuning|否| +|模型大小|4.4MB| +|PSNR|37.66| +|最新更新日期|2021-02-26| -``` -$ hub run falsr_c --input_path "/PATH/TO/IMAGE" -``` +## 一、模型基本信息 -## API +- ### 应用效果展示 + + - 样例结果示例(左为原图,右为效果图): +

+ +

-```python -def reconstruct(self, - images=None, - paths=None, - use_gpu=False, - visualization=False, - output_dir="falsr_c_output") -``` -预测API,用于图像超分辨率。 +- ### 模型介绍 -**参数** + - falsr_c是基于Fast, Accurate and Lightweight Super-Resolution with Neural Architecture Search设计的轻量化超分辨模型。该模型使用多目标方法处理超分问题,同时使用基于混合控制器的弹性搜索策略来提升模型性能。该模型提供的超分倍数为2倍。 -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + - 更多详情请参考:[falsr_c](https://github.com/xiaomi-automl/FALSR) -**返回** +## 二、安装 -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 超分辨后图像。 +- ### 1、环境依赖 -```python -def save_inference_model(self, - dirname='falsr_c_save_model', - model_filename=None, - params_filename=None, - combined=False) -``` + - paddlepaddle >= 2.0.0 -将模型保存到指定路径。 + - paddlehub >= 2.0.0 -**参数** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 +- ### 2、安装 + - ```shell + $ hub install falsr_c + ``` -## 代码示例 + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -import cv2 -import paddlehub as hub +## 三、模型API预测 +- ### 1、命令行预测 -sr_model = hub.Module(name='falsr_c') -im = cv2.imread('/PATH/TO/IMAGE').astype('float32') -#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 -res = sr_model.reconstruct(images=[im], visualization=True) -print(res[0]['data']) -sr_model.save_inference_model() -``` + - ``` + $ hub run falsr_c --input_path "/PATH/TO/IMAGE" + ``` +- ### 代码示例 -## 服务部署 + ```python + import cv2 + import paddlehub as hub -PaddleHub Serving可以部署一个图像超分的在线服务。 + sr_model = hub.Module(name='falsr_c') + im = cv2.imread('/PATH/TO/IMAGE').astype('float32') + #visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 + res = sr_model.reconstruct(images=[im], visualization=True) + print(res[0]['data']) + sr_model.save_inference_model() + ``` -## 第一步:启动PaddleHub Serving +- ### 2、API -运行启动命令: + - ```python + def reconstruct(self, + images=None, + paths=None, + use_gpu=False, + visualization=False, + output_dir="falsr_c_output") + ``` -```shell -$ hub serving start -m falsr_c -``` + - 预测API,用于图像超分辨率。 -这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + - **参数** -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -## 第二步:发送预测请求 + - **返回** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 超分辨后图像。 -```python -import requests -import json -import base64 + - ```python + def save_inference_model(self, + dirname='falsr_c_save_model', + model_filename=None, + params_filename=None, + combined=False) + ``` -import cv2 -import numpy as np + - 将模型保存到指定路径。 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - **参数** -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') -data = {'images':[cv2_to_base64(org_im)]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/falsr_c" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -sr = base64_to_cv2(r.json()["results"][0]['data']) -cv2.imwrite('falsr_c_X2.png', sr) -print("save image as falsr_c_X2.png") -``` + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -### 查看代码 -https://github.com/xiaomi-automl/FALSR +## 四、服务部署 -### 依赖 +- PaddleHub Serving可以部署一个图像超分的在线服务。 -paddlepaddle >= 1.8.0 +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m falsr_c + ``` + + - 这样就完成了一个超分任务的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + + - ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/falsr_c" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + sr = base64_to_cv2(r.json()["results"][0]['data']) + cv2.imwrite('falsr_c_X2.png', sr) + print("save image as falsr_c_X2.png") + ``` + + +## 五、更新历史 + + +* 1.0.0 + + 初始发布 -paddlehub >= 1.7.1 diff --git a/modules/image/Image_editing/super_resolution/realsr/README.md b/modules/image/Image_editing/super_resolution/realsr/README.md index 0ca1f8795bced6f8880678b2739b69b450656570..02e66678c5926f6f9e54344d6f74a1bf91304b39 100644 --- a/modules/image/Image_editing/super_resolution/realsr/README.md +++ b/modules/image/Image_editing/super_resolution/realsr/README.md @@ -1,121 +1,176 @@ +# realsr -## 模型概述 -realsr是用于图像和视频超分模型,该模型基于Toward Real-World Single Image Super-Resolution: A New Benchmark and A New Mode,它能够将输入的图片和视频超分四倍。 +|模型名称|reasr| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|LP-KPN| +|数据集|RealSR dataset| +|是否支持Fine-tuning|否| +|模型大小|64MB| +|PSNR|29.05| +|最新更新日期|2021-02-26| -## API 说明 -```python -def predict(self, input): -``` -超分API,得到超分后的图片或者视频。 +## 一、模型基本信息 +- ### 应用效果展示 -**参数** + - 样例结果示例(左为原图,右为效果图): +

+ +

-* input (str): 图片或者视频的路径; +- ### 模型介绍 -**返回** + - realsr是用于图像和视频超分模型,该模型基于Toward Real-World Single Image Super-Resolution: A New Benchmark and A New Mode,它能够将输入的图片和视频超分四倍。 + + - 更多详情请参考:[realsr](https://github.com/csjcai/RealSR) + -若输入是图片,返回值为: -* pred_img(np.ndarray): BGR图片数据; -* out_path(str): 保存图片路径。 +## 二、安装 -若输入是视频,返回值为: -* frame_pattern_combined(str): 视频超分后单帧数据保存路径; -* vid_out_path(str): 视频保存路径。 +- ### 1、环境依赖 -```python -def run_image(self, img): -``` -图像超分API, 得到超分后的图片。 + - paddlepaddle >= 2.0.0 -**参数** + - paddlehub >= 2.0.0 -* img (str|np.ndarray): 图片路径或则BGR格式图片。 + - NOTE: 使用该模型需要自行安装ffmpeg,若您使用conda环境,推荐使用如下语句进行安装。 -**返回** + ```shell + $ conda install x264=='1!152.20180717' ffmpeg=4.0.2 -c conda-forge + ``` -* pred_img(np.ndarray): BGR图片数据; -```python -def run_video(self, video): -``` -视频超分API, 得到超分后的视频。 +- ### 2、安装 -**参数** + - ```shell + $ hub install realsr + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* video(str): 待处理视频路径。 + -**返回** -* frame_pattern_combined(str): 视频超分后单帧数据保存路径; -* vid_out_path(str): 视频保存路径。 +## 三、模型API预测 -## 预测代码示例 + - ### 1、代码示例 -```python -import paddlehub as hub + ```python + import paddlehub as hub -model = hub.Module(name='realsr') -model.predict('/PATH/TO/IMAGE/OR/VIDEO') -``` + model = hub.Module(name='realsr') + model.predict('/PATH/TO/IMAGE/OR/VIDEO') + ``` + - ### 2、API -## 服务部署 + - ```python + def predict(self, input): + ``` -PaddleHub Serving可以部署一个在线照片超分服务。 + - 超分API,得到超分后的图片或者视频。 -## 第一步:启动PaddleHub Serving -运行启动命令: -```shell -$ hub serving start -m realsr -``` + - **参数** -这样就完成了一个图像超分的在线服务API的部署,默认端口号为8866。 + - input (str): 图片或者视频的路径; -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - **返回** -## 第二步:发送预测请求 + - 若输入是图片,返回值为: + - pred_img(np.ndarray): BGR图片数据; + - out_path(str): 保存图片路径。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - 若输入是视频,返回值为: + - frame_pattern_combined(str): 视频超分后单帧数据保存路径; + - vid_out_path(str): 视频保存路径。 -```python -import requests -import json -import base64 + - ```python + def run_image(self, img): + ``` + - 图像超分API, 得到超分后的图片。 -import cv2 -import numpy as np + - **参数** -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - img (str|np.ndarray): 图片路径或则BGR格式图片。 -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') -data = {'images':cv2_to_base64(org_im)} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/realsr" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -img = base64_to_cv2(r.json()["results"]) -cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) + - **返回** -``` + - pred_img(np.ndarray): BGR图片数据; -## 模型相关信息 + - ```python + def run_video(self, video): + ``` + - 视频超分API, 得到超分后的视频。 -### 模型代码 + - **参数** -https://github.com/csjcai/RealSR + - video(str): 待处理视频路径。 -### 依赖 + - **返回** -paddlepaddle >= 2.0.0rc + - frame_pattern_combined(str): 视频超分后单帧数据保存路径; + - vid_out_path(str): 视频保存路径。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线照片超分服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m realsr + ``` + + - 这样就完成了一个图像超分的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':cv2_to_base64(org_im)} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/realsr" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + img = base64_to_cv2(r.json()["results"]) + cv2.imwrite('/PATH/TO/SAVE/IMAGE', img) + + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 适配paddlehub2.0版本 -paddlehub >= 1.8.3 diff --git a/modules/image/Image_gan/attgan_celeba/README.md b/modules/image/Image_gan/attgan_celeba/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f9a7a211949a026093547542c845d4e182392f98 --- /dev/null +++ b/modules/image/Image_gan/attgan_celeba/README.md @@ -0,0 +1,105 @@ +# attgan_celeba + +|模型名称|attgan_celeba| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|AttGAN| +|数据集|Celeba| +|是否支持Fine-tuning|否| +|模型大小|167MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: + +

+
+ 图1. AttGAN的效果图(图片属性分别为:original image, Bald, Bangs, Black_Hair, Blond_Hair, Brown_Hair, Bushy_Eyebrows, Eyeglasses, Gender, Mouth_Slightly_Open, Mustache, No_Beard, Pale_Skin, Aged)
+

+ + +- ### 模型介绍 + + - AttGAN 是一种生成对抗网络(Generative Adversarial Networks),它利用分类损失和重构损失来保证改变特定的属性。该 PaddleHub Module 使用 Celeba 数据集训练完成,目前支持 "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged" 这十三种人脸属性转换。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.5.2 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install attgan_celeba==1.0.0 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run attgan_celeba --image "/PATH/TO/IMAGE" --style "target_attribute" + ``` + - **参数** + + - image :指定图片路径。 + + - style 指定拟转换的属性,可选择 "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged" 中的一种。 + + + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + attgan = hub.Module(name="attgan_celeba") + + test_img_path = ["/PATH/TO/IMAGE"] + trans_attr = ["Bangs"] + + # set input dict + input_dict = {"image": test_img_path, "style": trans_attr} + + # execute predict and print the result + results = attgan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - 风格转换API,用于图像生成。 + + - **参数** + + - data: dict 类型,有以下字段 + - image (list\[str\]): list中每个元素为待转换的图片路径。 + - style (list\[str\]): list中每个元素为字符串,填写待转换的人脸属性。 + + - **返回** + - res (list\[str\]): 提示生成图片的保存路径。 + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + diff --git a/modules/image/Image_gan/cyclegan_cityscapes/README.md b/modules/image/Image_gan/cyclegan_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a63efd9e92c93d8b65545dedac8c33a349549aec --- /dev/null +++ b/modules/image/Image_gan/cyclegan_cityscapes/README.md @@ -0,0 +1,108 @@ +# cyclegan_cityscapes + +|模型名称|cyclegan_cityscapes| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|CycleGAN| +|数据集|Cityscapes| +|是否支持Fine-tuning|否| +|模型大小|33MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: + +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ + +- ### 模型介绍 + + - CycleGAN是生成对抗网络(Generative Adversarial Networks )的一种,与传统的GAN只能单向生成图片不同,CycleGAN可以同时完成两个domain的图片进行相互转换。该PaddleHub Module使用Cityscapes数据集训练完成,支持图片从实景图转换为语义分割结果,也支持从语义分割结果转换为实景图。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.1.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install cyclegan_cityscapes==1.0.0 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run cyclegan_cityscapes --input_path "/PATH/TO/IMAGE" + ``` + - **参数** + + - input_path :指定图片路径。 + + + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + cyclegan = hub.Module(name="cyclegan_cityscapes") + + test_img_path = "/PATH/TO/IMAGE" + + # set input dict + input_dict = {"image": [test_img_path]} + + # execute predict and print the result + results = cyclegan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - 风格转换API,用于图像生成。 + + - **参数** + + - data: dict 类型,有以下字段: + - image (list\[str\]): list中每个元素为待转换的图片路径。 + + - **返回** + - res (list\[str\]): 每个元素为对应输入图片的预测结果。预测结果为dict类型,有以下字段: + - origin: 原输入图片路径. + - generated: 生成图片的路径。 + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + diff --git a/modules/image/Image_gan/gan/first_order_motion/README.md b/modules/image/Image_gan/gan/first_order_motion/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ffca34eb9f96a9037a0b95e23b2ae20ded537b16 --- /dev/null +++ b/modules/image/Image_gan/gan/first_order_motion/README.md @@ -0,0 +1,95 @@ +# first_order_motion + +|模型名称|first_order_motion| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|S3FD| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|343MB| +|最新更新日期|2021-12-24| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输入视频 +
+ +
+ 输出视频 +
+

+ +- ### 模型介绍 + + - First Order Motion的任务是图像动画/Image Animation,即输入为一张源图片和一个驱动视频,源图片中的人物则会做出驱动视频中的动作。 + + +## 二、安装 + +- ### 1、环境依赖 + - paddlepaddle >= 2.1.0 + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install first_order_motion + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run first_order_motion --source_image "/PATH/TO/IMAGE" --driving_video "/PATH/TO/VIDEO" --use_gpu + ``` + - 通过命令行方式实现视频驱动生成模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="first_order_motion") + module.generate(source_image="/PATH/TO/IMAGE", driving_video="/PATH/TO/VIDEO", ratio=0.4, image_size=256, output_dir='./motion_driving_result/', filename='result.mp4', use_gpu=False) + ``` + +- ### 3、API + + - ```python + generate(self, source_image=None, driving_video=None, ratio=0.4, image_size=256, output_dir='./motion_driving_result/', filename='result.mp4', use_gpu=False) + ``` + - 视频驱动生成API。 + + - **参数** + - source_image (str): 原始图片,支持单人图片和多人图片,视频中人物的表情动作将迁移到该原始图片中的人物上。 + - driving_video (str): 驱动视频,视频中人物的表情动作作为待迁移的对象。 + - ratio (float): 贴回驱动生成的人脸区域占原图的比例, 用户需要根据生成的效果调整该参数,尤其对于多人脸距离比较近的情况下需要调整改参数, 默认为0.4,调整范围是[0.4, 0.5]。 + - image_size (int): 图片人脸大小,默认为256,可设置为512。 + - output\_dir (str): 结果保存的文件夹名;
+ - filename (str): 结果保存的文件名。 + - use\_gpu (bool): 是否使用 GPU;
+ + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install first_order_motion==1.0.0 + ``` diff --git a/modules/image/Image_gan/gan/first_order_motion/model.py b/modules/image/Image_gan/gan/first_order_motion/model.py new file mode 100644 index 0000000000000000000000000000000000000000..35b180d4283f86644ab16d1170e99f6d8bb5d5cf --- /dev/null +++ b/modules/image/Image_gan/gan/first_order_motion/model.py @@ -0,0 +1,352 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import sys +import math +import pickle + +import yaml +import imageio +import numpy as np +from tqdm import tqdm +from scipy.spatial import ConvexHull +import cv2 +import paddle +from ppgan.utils.download import get_path_from_url +from ppgan.utils.animate import normalize_kp +from ppgan.modules.keypoint_detector import KPDetector +from ppgan.models.generators.occlusion_aware import OcclusionAwareGenerator +from ppgan.faceutils import face_detection + + +class FirstOrderPredictor: + def __init__(self, + weight_path=None, + config=None, + image_size=256, + relative=True, + adapt_scale=False, + find_best_frame=False, + best_frame=None, + face_detector='sfd', + multi_person=False, + face_enhancement=True, + batch_size=1, + mobile_net=False): + if config is not None and isinstance(config, str): + with open(config) as f: + self.cfg = yaml.load(f, Loader=yaml.SafeLoader) + elif isinstance(config, dict): + self.cfg = config + elif config is None: + self.cfg = { + 'model': { + 'common_params': { + 'num_kp': 10, + 'num_channels': 3, + 'estimate_jacobian': True + }, + 'generator': { + 'kp_detector_cfg': { + 'temperature': 0.1, + 'block_expansion': 32, + 'max_features': 1024, + 'scale_factor': 0.25, + 'num_blocks': 5 + }, + 'generator_cfg': { + 'block_expansion': 64, + 'max_features': 512, + 'num_down_blocks': 2, + 'num_bottleneck_blocks': 6, + 'estimate_occlusion_map': True, + 'dense_motion_params': { + 'block_expansion': 64, + 'max_features': 1024, + 'num_blocks': 5, + 'scale_factor': 0.25 + } + } + } + } + } + self.image_size = image_size + if weight_path is None: + if mobile_net: + vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-mobile.pdparams' + + else: + if self.image_size == 512: + vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk-512.pdparams' + else: + vox_cpk_weight_url = 'https://paddlegan.bj.bcebos.com/applications/first_order_model/vox-cpk.pdparams' + weight_path = get_path_from_url(vox_cpk_weight_url) + + self.weight_path = weight_path + self.relative = relative + self.adapt_scale = adapt_scale + self.find_best_frame = find_best_frame + self.best_frame = best_frame + self.face_detector = face_detector + self.generator, self.kp_detector = self.load_checkpoints(self.cfg, self.weight_path) + self.multi_person = multi_person + self.face_enhancement = face_enhancement + self.batch_size = batch_size + if face_enhancement: + from ppgan.faceutils.face_enhancement import FaceEnhancement + self.faceenhancer = FaceEnhancement(batch_size=batch_size) + + def read_img(self, path): + img = imageio.imread(path) + if img.ndim == 2: + img = np.expand_dims(img, axis=2) + # som images have 4 channels + if img.shape[2] > 3: + img = img[:, :, :3] + return img + + def run(self, source_image, driving_video, ratio, image_size, output_dir, filename): + self.ratio = ratio + self.image_size = image_size + self.output = output_dir + self.filename = filename + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + def get_prediction(face_image): + if self.find_best_frame or self.best_frame is not None: + i = self.best_frame if self.best_frame is not None else self.find_best_frame_func( + source_image, driving_video) + + print("Best frame: " + str(i)) + driving_forward = driving_video[i:] + driving_backward = driving_video[:(i + 1)][::-1] + predictions_forward = self.make_animation( + face_image, + driving_forward, + self.generator, + self.kp_detector, + relative=self.relative, + adapt_movement_scale=self.adapt_scale) + predictions_backward = self.make_animation( + face_image, + driving_backward, + self.generator, + self.kp_detector, + relative=self.relative, + adapt_movement_scale=self.adapt_scale) + predictions = predictions_backward[::-1] + predictions_forward[1:] + else: + predictions = self.make_animation( + face_image, + driving_video, + self.generator, + self.kp_detector, + relative=self.relative, + adapt_movement_scale=self.adapt_scale) + return predictions + + source_image = self.read_img(source_image) + reader = imageio.get_reader(driving_video) + fps = reader.get_meta_data()['fps'] + driving_video = [] + try: + for im in reader: + driving_video.append(im) + except RuntimeError: + print("Read driving video error!") + pass + reader.close() + + driving_video = [cv2.resize(frame, (self.image_size, self.image_size)) / 255.0 for frame in driving_video] + results = [] + + bboxes = self.extract_bbox(source_image.copy()) + print(str(len(bboxes)) + " persons have been detected") + + # for multi person + for rec in bboxes: + face_image = source_image.copy()[rec[1]:rec[3], rec[0]:rec[2]] + face_image = cv2.resize(face_image, (self.image_size, self.image_size)) / 255.0 + predictions = get_prediction(face_image) + results.append({'rec': rec, 'predict': [predictions[i] for i in range(predictions.shape[0])]}) + if len(bboxes) == 1 or not self.multi_person: + break + out_frame = [] + + for i in range(len(driving_video)): + frame = source_image.copy() + for result in results: + x1, y1, x2, y2, _ = result['rec'] + h = y2 - y1 + w = x2 - x1 + out = result['predict'][i] + out = cv2.resize(out.astype(np.uint8), (x2 - x1, y2 - y1)) + if len(results) == 1: + frame[y1:y2, x1:x2] = out + break + else: + patch = np.zeros(frame.shape).astype('uint8') + patch[y1:y2, x1:x2] = out + mask = np.zeros(frame.shape[:2]).astype('uint8') + cx = int((x1 + x2) / 2) + cy = int((y1 + y2) / 2) + cv2.circle(mask, (cx, cy), math.ceil(h * self.ratio), (255, 255, 255), -1, 8, 0) + frame = cv2.copyTo(patch, mask, frame) + + out_frame.append(frame) + imageio.mimsave(os.path.join(self.output, self.filename), [frame for frame in out_frame], fps=fps) + + def load_checkpoints(self, config, checkpoint_path): + + generator = OcclusionAwareGenerator( + **config['model']['generator']['generator_cfg'], **config['model']['common_params'], inference=True) + + kp_detector = KPDetector(**config['model']['generator']['kp_detector_cfg'], **config['model']['common_params']) + + checkpoint = paddle.load(self.weight_path) + generator.set_state_dict(checkpoint['generator']) + + kp_detector.set_state_dict(checkpoint['kp_detector']) + + generator.eval() + kp_detector.eval() + + return generator, kp_detector + + def make_animation(self, + source_image, + driving_video, + generator, + kp_detector, + relative=True, + adapt_movement_scale=True): + with paddle.no_grad(): + predictions = [] + source = paddle.to_tensor(source_image[np.newaxis].astype(np.float32)).transpose([0, 3, 1, 2]) + + driving = paddle.to_tensor(np.array(driving_video).astype(np.float32)).transpose([0, 3, 1, 2]) + kp_source = kp_detector(source) + kp_driving_initial = kp_detector(driving[0:1]) + kp_source_batch = {} + kp_source_batch["value"] = paddle.tile(kp_source["value"], repeat_times=[self.batch_size, 1, 1]) + kp_source_batch["jacobian"] = paddle.tile(kp_source["jacobian"], repeat_times=[self.batch_size, 1, 1, 1]) + source = paddle.tile(source, repeat_times=[self.batch_size, 1, 1, 1]) + begin_idx = 0 + for frame_idx in tqdm(range(int(np.ceil(float(driving.shape[0]) / self.batch_size)))): + frame_num = min(self.batch_size, driving.shape[0] - begin_idx) + driving_frame = driving[begin_idx:begin_idx + frame_num] + kp_driving = kp_detector(driving_frame) + kp_source_img = {} + kp_source_img["value"] = kp_source_batch["value"][0:frame_num] + kp_source_img["jacobian"] = kp_source_batch["jacobian"][0:frame_num] + + kp_norm = normalize_kp( + kp_source=kp_source, + kp_driving=kp_driving, + kp_driving_initial=kp_driving_initial, + use_relative_movement=relative, + use_relative_jacobian=relative, + adapt_movement_scale=adapt_movement_scale) + + out = generator(source[0:frame_num], kp_source=kp_source_img, kp_driving=kp_norm) + img = np.transpose(out['prediction'].numpy(), [0, 2, 3, 1]) * 255.0 + + if self.face_enhancement: + img = self.faceenhancer.enhance_from_batch(img) + + predictions.append(img) + begin_idx += frame_num + return np.concatenate(predictions) + + def find_best_frame_func(self, source, driving): + import face_alignment + + def normalize_kp(kp): + kp = kp - kp.mean(axis=0, keepdims=True) + area = ConvexHull(kp[:, :2]).volume + area = np.sqrt(area) + kp[:, :2] = kp[:, :2] / area + return kp + + fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D, flip_input=True) + + kp_source = fa.get_landmarks(255 * source)[0] + kp_source = normalize_kp(kp_source) + norm = float('inf') + frame_num = 0 + for i, image in tqdm(enumerate(driving)): + kp_driving = fa.get_landmarks(255 * image)[0] + kp_driving = normalize_kp(kp_driving) + new_norm = (np.abs(kp_source - kp_driving)**2).sum() + if new_norm < norm: + norm = new_norm + frame_num = i + return frame_num + + def extract_bbox(self, image): + detector = face_detection.FaceAlignment( + face_detection.LandmarksType._2D, flip_input=False, face_detector=self.face_detector) + + frame = [image] + predictions = detector.get_detections_for_image(np.array(frame)) + person_num = len(predictions) + if person_num == 0: + return np.array([]) + results = [] + face_boxs = [] + h, w, _ = image.shape + for rect in predictions: + bh = rect[3] - rect[1] + bw = rect[2] - rect[0] + cy = rect[1] + int(bh / 2) + cx = rect[0] + int(bw / 2) + margin = max(bh, bw) + y1 = max(0, cy - margin) + x1 = max(0, cx - int(0.8 * margin)) + y2 = min(h, cy + margin) + x2 = min(w, cx + int(0.8 * margin)) + area = (y2 - y1) * (x2 - x1) + results.append([x1, y1, x2, y2, area]) + # if a person has more than one bbox, keep the largest one + # maybe greedy will be better? + sorted(results, key=lambda area: area[4], reverse=True) + results_box = [results[0]] + for i in range(1, person_num): + num = len(results_box) + add_person = True + for j in range(num): + pre_person = results_box[j] + iou = self.IOU(pre_person[0], pre_person[1], pre_person[2], pre_person[3], pre_person[4], results[i][0], + results[i][1], results[i][2], results[i][3], results[i][4]) + if iou > 0.5: + add_person = False + break + if add_person: + results_box.append(results[i]) + boxes = np.array(results_box) + return boxes + + def IOU(self, ax1, ay1, ax2, ay2, sa, bx1, by1, bx2, by2, sb): + #sa = abs((ax2 - ax1) * (ay2 - ay1)) + #sb = abs((bx2 - bx1) * (by2 - by1)) + x1, y1 = max(ax1, bx1), max(ay1, by1) + x2, y2 = min(ax2, bx2), min(ay2, by2) + w = x2 - x1 + h = y2 - y1 + if w < 0 or h < 0: + return 0.0 + else: + return 1.0 * w * h / (sa + sb - w * h) diff --git a/modules/image/Image_gan/gan/first_order_motion/module.py b/modules/image/Image_gan/gan/first_order_motion/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b3d5ecb07b5756865d0e41678f2234520cbd46f6 --- /dev/null +++ b/modules/image/Image_gan/gan/first_order_motion/module.py @@ -0,0 +1,106 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import copy + +import paddle +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +import numpy as np +import cv2 +from skimage.io import imread +from skimage.transform import rescale, resize + +from .model import FirstOrderPredictor + + +@moduleinfo( + name="first_order_motion", type="CV/gan", author="paddlepaddle", author_email="", summary="", version="1.0.0") +class FirstOrderMotion: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "vox-cpk.pdparams") + self.network = FirstOrderPredictor(weight_path=self.pretrained_model, face_enhancement=True) + + def generate(self, + source_image=None, + driving_video=None, + ratio=0.4, + image_size=256, + output_dir='./motion_driving_result/', + filename='result.mp4', + use_gpu=False): + ''' + source_image (str): path to image
+ driving_video (str) : path to driving_video
+ ratio: margin ratio + image_size: size of image + output_dir: the dir to save the results + filename: filename to save the results + use_gpu: if True, use gpu to perform the computation, otherwise cpu. + ''' + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if source_image == None or driving_video == None: + print('No image or driving video provided. Please input an image and a driving video.') + return + self.network.run(source_image, driving_video, ratio, image_size, output_dir, filename) + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + self.generate( + source_image=self.args.source_image, + driving_video=self.args.driving_video, + ratio=self.args.ratio, + image_size=self.args.image_size, + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu) + return + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='motion_driving_result', help='output directory for saving result.') + self.arg_config_group.add_argument("--filename", default='result.mp4', help="filename to output") + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument("--source_image", type=str, help="path to source image") + self.arg_input_group.add_argument("--driving_video", type=str, help="path to driving video") + self.arg_input_group.add_argument("--ratio", dest="ratio", type=float, default=0.4, help="margin ratio") + self.arg_input_group.add_argument( + "--image_size", dest="image_size", type=int, default=256, help="size of image") diff --git a/modules/image/Image_gan/gan/first_order_motion/requirements.txt b/modules/image/Image_gan/gan/first_order_motion/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/gan/first_order_motion/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/gan/pixel2style2pixel/README.md b/modules/image/Image_gan/gan/pixel2style2pixel/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fa0c3925e23e62f30d6c4b3635c62a0ba1dfb6dd --- /dev/null +++ b/modules/image/Image_gan/gan/pixel2style2pixel/README.md @@ -0,0 +1,133 @@ +# pixel2style2pixel + +|模型名称|pixel2style2pixel| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|Pixel2Style2Pixel| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|1.7GB| +|最新更新日期|2021-12-14| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ +- ### 模型介绍 + + - Pixel2Style2Pixel使用相当大的模型对图像进行编码,将图像编码到StyleGAN V2的风格向量空间中,使编码前的图像和解码后的图像具有强关联性。该模块应用于人脸转正任务。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) +- ### 2、安装 + + - ```shell + $ hub install pixel2style2pixel + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run pixel2style2pixel --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现人脸转正模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="pixel2style2pixel") + input_path = ["/PATH/TO/IMAGE"] + # Read from a file + module.style_transfer(paths=input_path, output_dir='./transfer_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + style_transfer(images=None, paths=None, output_dir='./transfer_result/', use_gpu=False, visualization=True): + ``` + - 人脸转正生成API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线人脸转正服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m pixel2style2pixel + ``` + + - 这样就完成了一个人脸转正的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/pixel2style2pixel" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install pixel2style2pixel==1.0.0 + ``` diff --git a/modules/image/Image_gan/gan/pixel2style2pixel/model.py b/modules/image/Image_gan/gan/pixel2style2pixel/model.py new file mode 100644 index 0000000000000000000000000000000000000000..e82fbc8ead5e2545628e59fff817b3a378d63560 --- /dev/null +++ b/modules/image/Image_gan/gan/pixel2style2pixel/model.py @@ -0,0 +1,205 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import scipy +import random +import numpy as np +import paddle +import paddle.vision.transforms as T +import ppgan.faceutils as futils +from ppgan.models.generators import Pixel2Style2Pixel +from ppgan.utils.download import get_path_from_url +from PIL import Image + +model_cfgs = { + 'ffhq-inversion': { + 'model_urls': + 'https://paddlegan.bj.bcebos.com/models/pSp-ffhq-inversion.pdparams', + 'transform': + T.Compose([T.Resize((256, 256)), + T.Transpose(), + T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])]), + 'size': + 1024, + 'style_dim': + 512, + 'n_mlp': + 8, + 'channel_multiplier': + 2 + }, + 'ffhq-toonify': { + 'model_urls': + 'https://paddlegan.bj.bcebos.com/models/pSp-ffhq-toonify.pdparams', + 'transform': + T.Compose([T.Resize((256, 256)), + T.Transpose(), + T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])]), + 'size': + 1024, + 'style_dim': + 512, + 'n_mlp': + 8, + 'channel_multiplier': + 2 + }, + 'default': { + 'transform': + T.Compose([T.Resize((256, 256)), + T.Transpose(), + T.Normalize([127.5, 127.5, 127.5], [127.5, 127.5, 127.5])]) + } +} + + +def run_alignment(image): + img = Image.fromarray(image).convert("RGB") + face = futils.dlib.detect(img) + if not face: + raise Exception('Could not find a face in the given image.') + face_on_image = face[0] + lm = futils.dlib.landmarks(img, face_on_image) + lm = np.array(lm)[:, ::-1] + lm_eye_left = lm[36:42] + lm_eye_right = lm[42:48] + lm_mouth_outer = lm[48:60] + + output_size = 1024 + transform_size = 4096 + enable_padding = True + + # Calculate auxiliary vectors. + eye_left = np.mean(lm_eye_left, axis=0) + eye_right = np.mean(lm_eye_right, axis=0) + eye_avg = (eye_left + eye_right) * 0.5 + eye_to_eye = eye_right - eye_left + mouth_left = lm_mouth_outer[0] + mouth_right = lm_mouth_outer[6] + mouth_avg = (mouth_left + mouth_right) * 0.5 + eye_to_mouth = mouth_avg - eye_avg + + # Choose oriented crop rectangle. + x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1] + x /= np.hypot(*x) + x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) + y = np.flipud(x) * [-1, 1] + c = eye_avg + eye_to_mouth * 0.1 + quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) + qsize = np.hypot(*x) * 2 + + # Shrink. + shrink = int(np.floor(qsize / output_size * 0.5)) + if shrink > 1: + rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink))) + img = img.resize(rsize, Image.ANTIALIAS) + quad /= shrink + qsize /= shrink + + # Crop. + border = max(int(np.rint(qsize * 0.1)), 3) + crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), + int(np.ceil(max(quad[:, 1])))) + crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), + min(crop[3] + border, img.size[1])) + if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: + img = img.crop(crop) + quad -= crop[0:2] + + # Pad. + pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))), + int(np.ceil(max(quad[:, 1])))) + pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), + max(pad[3] - img.size[1] + border, 0)) + if enable_padding and max(pad) > border - 4: + pad = np.maximum(pad, int(np.rint(qsize * 0.3))) + img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') + h, w, _ = img.shape + y, x, _ = np.ogrid[:h, :w, :1] + mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], + np.float32(w - 1 - x) / pad[2]), + 1.0 - np.minimum(np.float32(y) / pad[1], + np.float32(h - 1 - y) / pad[3])) + blur = qsize * 0.02 + img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) + img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0) + img = Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB') + quad += pad[:2] + + # Transform. + img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR) + + return img + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +class Pixel2Style2PixelPredictor: + def __init__(self, + weight_path=None, + model_type=None, + seed=None, + size=1024, + style_dim=512, + n_mlp=8, + channel_multiplier=2): + + if weight_path is None and model_type != 'default': + if model_type in model_cfgs.keys(): + weight_path = get_path_from_url(model_cfgs[model_type]['model_urls']) + size = model_cfgs[model_type].get('size', size) + style_dim = model_cfgs[model_type].get('style_dim', style_dim) + n_mlp = model_cfgs[model_type].get('n_mlp', n_mlp) + channel_multiplier = model_cfgs[model_type].get('channel_multiplier', channel_multiplier) + checkpoint = paddle.load(weight_path) + else: + raise ValueError('Predictor need a weight path or a pretrained model type') + else: + checkpoint = paddle.load(weight_path) + + opts = checkpoint.pop('opts') + opts = AttrDict(opts) + opts['size'] = size + opts['style_dim'] = style_dim + opts['n_mlp'] = n_mlp + opts['channel_multiplier'] = channel_multiplier + + self.generator = Pixel2Style2Pixel(opts) + self.generator.set_state_dict(checkpoint) + self.generator.eval() + + if seed is not None: + paddle.seed(seed) + random.seed(seed) + np.random.seed(seed) + + self.model_type = 'default' if model_type is None else model_type + + def run(self, image): + src_img = run_alignment(image) + src_img = np.asarray(src_img) + transformed_image = model_cfgs[self.model_type]['transform'](src_img) + dst_img, latents = self.generator( + paddle.to_tensor(transformed_image[None, ...]), resize=False, return_latents=True) + dst_img = (dst_img * 0.5 + 0.5)[0].numpy() * 255 + dst_img = dst_img.transpose((1, 2, 0)) + dst_npy = latents[0].numpy() + + return dst_img, dst_npy diff --git a/modules/image/Image_gan/gan/pixel2style2pixel/module.py b/modules/image/Image_gan/gan/pixel2style2pixel/module.py new file mode 100644 index 0000000000000000000000000000000000000000..fb054a6f09becd52790df9437abb6de28f42118d --- /dev/null +++ b/modules/image/Image_gan/gan/pixel2style2pixel/module.py @@ -0,0 +1,137 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import copy + +import paddle +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +import numpy as np +import cv2 +from skimage.io import imread +from skimage.transform import rescale, resize + +from .model import Pixel2Style2PixelPredictor +from .util import base64_to_cv2 + + +@moduleinfo( + name="pixel2style2pixel", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class pixel2style2pixel: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "pSp-ffhq-inversion.pdparams") + + self.network = Pixel2Style2PixelPredictor(weight_path=self.pretrained_model, model_type='ffhq-inversion') + + def style_transfer(self, + images=None, + paths=None, + output_dir='./transfer_result/', + use_gpu=False, + visualization=True): + ''' + + + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR(read by cv2). + paths (list[str]): paths to images + output_dir: the dir to save the results + use_gpu: if True, use gpu to perform the computation, otherwise cpu. + visualization: if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + out = self.network.run(image) + results.append(out) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + out = self.network.run(image) + results.append(out) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + if out is not None: + cv2.imwrite(os.path.join(output_dir, 'output_{}.png'.format(i)), out[0][:, :, ::-1]) + np.save(os.path.join(output_dir, 'output_{}.npy'.format(i)), out[1]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.style_transfer( + paths=[self.args.input_path], + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.style_transfer(images=images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='transfer_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") diff --git a/modules/image/Image_gan/gan/pixel2style2pixel/requirements.txt b/modules/image/Image_gan/gan/pixel2style2pixel/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..d9bfc85782a3ee323241fe7beb87a9f281c120fe --- /dev/null +++ b/modules/image/Image_gan/gan/pixel2style2pixel/requirements.txt @@ -0,0 +1,2 @@ +ppgan +dlib diff --git a/modules/image/Image_gan/gan/pixel2style2pixel/util.py b/modules/image/Image_gan/gan/pixel2style2pixel/util.py new file mode 100644 index 0000000000000000000000000000000000000000..b88ac3562b74cadc1d4d6459a56097ca4a938a0b --- /dev/null +++ b/modules/image/Image_gan/gan/pixel2style2pixel/util.py @@ -0,0 +1,10 @@ +import base64 +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/thirdparty/image/Image_gan/gan/stgan_bald/README.md b/modules/image/Image_gan/gan/stgan_bald/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/gan/stgan_bald/README.md rename to modules/image/Image_gan/gan/stgan_bald/README.md diff --git a/modules/thirdparty/image/Image_gan/gan/stgan_bald/data_feed.py b/modules/image/Image_gan/gan/stgan_bald/data_feed.py similarity index 100% rename from modules/thirdparty/image/Image_gan/gan/stgan_bald/data_feed.py rename to modules/image/Image_gan/gan/stgan_bald/data_feed.py diff --git a/modules/thirdparty/image/Image_gan/gan/stgan_bald/module.py b/modules/image/Image_gan/gan/stgan_bald/module.py similarity index 100% rename from modules/thirdparty/image/Image_gan/gan/stgan_bald/module.py rename to modules/image/Image_gan/gan/stgan_bald/module.py diff --git a/modules/thirdparty/image/Image_gan/gan/stgan_bald/module/__model__ b/modules/image/Image_gan/gan/stgan_bald/module/__model__ similarity index 100% rename from modules/thirdparty/image/Image_gan/gan/stgan_bald/module/__model__ rename to modules/image/Image_gan/gan/stgan_bald/module/__model__ diff --git a/modules/thirdparty/image/Image_gan/gan/stgan_bald/processor.py b/modules/image/Image_gan/gan/stgan_bald/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/gan/stgan_bald/processor.py rename to modules/image/Image_gan/gan/stgan_bald/processor.py diff --git a/modules/thirdparty/image/Image_gan/gan/stgan_bald/requirements.txt b/modules/image/Image_gan/gan/stgan_bald/requirements.txt similarity index 100% rename from modules/thirdparty/image/Image_gan/gan/stgan_bald/requirements.txt rename to modules/image/Image_gan/gan/stgan_bald/requirements.txt diff --git a/modules/image/Image_gan/gan/styleganv2_editing/README.md b/modules/image/Image_gan/gan/styleganv2_editing/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4728207bff29bfc281a799f2bc6581634ebaecfc --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_editing/README.md @@ -0,0 +1,134 @@ +# styleganv2_editing + +|模型名称|styleganv2_editing| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|StyleGAN V2| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|190MB| +|最新更新日期|2021-12-15| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像(修改age) +
+

+ +- ### 模型介绍 + + - StyleGAN V2 的任务是使用风格向量进行image generation,而Editing模块则是利用预先对多图的风格向量进行分类回归得到的属性操纵向量来操纵生成图像的属性。 + + + +## 二、安装 + +- ### 1、环境依赖 + - ppgan + +- ### 2、安装 + + - ```shell + $ hub install styleganv2_editing + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run styleganv2_editing --input_path "/PATH/TO/IMAGE" --direction_name age --direction_offset 5 + ``` + - 通过命令行方式实现人脸编辑模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="styleganv2_editing") + input_path = ["/PATH/TO/IMAGE"] + # Read from a file + module.generate(paths=input_path, direction_name = 'age', direction_offset = 5, output_dir='./editing_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + generate(self, images=None, paths=None, direction_name = 'age', direction_offset = 0.0, output_dir='./editing_result/', use_gpu=False, visualization=True) + ``` + - 人脸编辑生成API。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据
+ - paths (list\[str\]): 图片路径;
+ - direction_name (str): 要编辑的属性名称,对于ffhq-conf-f有预先准备的这些属性: age、eyes_open、eye_distance、eye_eyebrow_distance、eye_ratio、gender、lip_ratio、mouth_open、mouth_ratio、nose_mouth_distance、nose_ratio、nose_tip、pitch、roll、smile、yaw
+ - direction_offset (float): 属性的偏移强度
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线人脸编辑服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m styleganv2_editing + ``` + + - 这样就完成了一个人脸编辑的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/styleganv2_editing" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install styleganv2_editing==1.0.0 + ``` diff --git a/modules/image/Image_gan/gan/styleganv2_editing/basemodel.py b/modules/image/Image_gan/gan/styleganv2_editing/basemodel.py new file mode 100644 index 0000000000000000000000000000000000000000..37eca73d4e14965a1f69e818744aa435a7e3600f --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_editing/basemodel.py @@ -0,0 +1,140 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import random +import numpy as np +import paddle +from ppgan.models.generators import StyleGANv2Generator +from ppgan.utils.download import get_path_from_url +from ppgan.utils.visual import make_grid, tensor2img, save_image + +model_cfgs = { + 'ffhq-config-f': { + 'model_urls': 'https://paddlegan.bj.bcebos.com/models/stylegan2-ffhq-config-f.pdparams', + 'size': 1024, + 'style_dim': 512, + 'n_mlp': 8, + 'channel_multiplier': 2 + }, + 'animeface-512': { + 'model_urls': 'https://paddlegan.bj.bcebos.com/models/stylegan2-animeface-512.pdparams', + 'size': 512, + 'style_dim': 512, + 'n_mlp': 8, + 'channel_multiplier': 2 + } +} + + +@paddle.no_grad() +def get_mean_style(generator): + mean_style = None + + for i in range(10): + style = generator.mean_latent(1024) + + if mean_style is None: + mean_style = style + + else: + mean_style += style + + mean_style /= 10 + return mean_style + + +@paddle.no_grad() +def sample(generator, mean_style, n_sample): + image = generator( + [paddle.randn([n_sample, generator.style_dim])], + truncation=0.7, + truncation_latent=mean_style, + )[0] + + return image + + +@paddle.no_grad() +def style_mixing(generator, mean_style, n_source, n_target): + source_code = paddle.randn([n_source, generator.style_dim]) + target_code = paddle.randn([n_target, generator.style_dim]) + + resolution = 2**((generator.n_latent + 2) // 2) + + images = [paddle.ones([1, 3, resolution, resolution]) * -1] + + source_image = generator([source_code], truncation_latent=mean_style, truncation=0.7)[0] + target_image = generator([target_code], truncation_latent=mean_style, truncation=0.7)[0] + + images.append(source_image) + + for i in range(n_target): + image = generator( + [target_code[i].unsqueeze(0).tile([n_source, 1]), source_code], + truncation_latent=mean_style, + truncation=0.7, + )[0] + images.append(target_image[i].unsqueeze(0)) + images.append(image) + + images = paddle.concat(images, 0) + + return images + + +class StyleGANv2Predictor: + def __init__(self, + output_path='output_dir', + weight_path=None, + model_type=None, + seed=None, + size=1024, + style_dim=512, + n_mlp=8, + channel_multiplier=2): + self.output_path = output_path + + if weight_path is None: + if model_type in model_cfgs.keys(): + weight_path = get_path_from_url(model_cfgs[model_type]['model_urls']) + size = model_cfgs[model_type].get('size', size) + style_dim = model_cfgs[model_type].get('style_dim', style_dim) + n_mlp = model_cfgs[model_type].get('n_mlp', n_mlp) + channel_multiplier = model_cfgs[model_type].get('channel_multiplier', channel_multiplier) + checkpoint = paddle.load(weight_path) + else: + raise ValueError('Predictor need a weight path or a pretrained model type') + else: + checkpoint = paddle.load(weight_path) + + self.generator = StyleGANv2Generator(size, style_dim, n_mlp, channel_multiplier) + self.generator.set_state_dict(checkpoint) + self.generator.eval() + + if seed is not None: + paddle.seed(seed) + random.seed(seed) + np.random.seed(seed) + + def run(self, n_row=3, n_col=5): + os.makedirs(self.output_path, exist_ok=True) + mean_style = get_mean_style(self.generator) + + img = sample(self.generator, mean_style, n_row * n_col) + save_image(tensor2img(make_grid(img, nrow=n_col)), f'{self.output_path}/sample.png') + + for j in range(2): + img = style_mixing(self.generator, mean_style, n_col, n_row) + save_image(tensor2img(make_grid(img, nrow=n_col + 1)), f'{self.output_path}/sample_mixing_{j}.png') diff --git a/modules/image/Image_gan/gan/styleganv2_editing/model.py b/modules/image/Image_gan/gan/styleganv2_editing/model.py new file mode 100644 index 0000000000000000000000000000000000000000..ccdadeaa8b125bfd98a86ae5a895d543914d5d9d --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_editing/model.py @@ -0,0 +1,58 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import numpy as np +import paddle + +from ppgan.utils.download import get_path_from_url +from .basemodel import StyleGANv2Predictor + +model_cfgs = { + 'ffhq-config-f': { + 'direction_urls': 'https://paddlegan.bj.bcebos.com/models/stylegan2-ffhq-config-f-directions.pdparams' + } +} + + +def make_image(tensor): + return (((tensor.detach() + 1) / 2 * 255).clip(min=0, max=255).transpose((0, 2, 3, 1)).numpy().astype('uint8')) + + +class StyleGANv2EditingPredictor(StyleGANv2Predictor): + def __init__(self, model_type=None, direction_path=None, **kwargs): + super().__init__(model_type=model_type, **kwargs) + + if direction_path is None and model_type is not None: + assert model_type in model_cfgs, f'There is not any pretrained direction file for {model_type} model.' + direction_path = get_path_from_url(model_cfgs[model_type]['direction_urls']) + self.directions = paddle.load(direction_path) + + @paddle.no_grad() + def run(self, latent, direction, offset): + + latent = paddle.to_tensor(latent).unsqueeze(0).astype('float32') + direction = self.directions[direction].unsqueeze(0).astype('float32') + + latent_n = paddle.concat([latent, latent + offset * direction], 0) + generator = self.generator + img_gen, _ = generator([latent_n], input_is_latent=True, randomize_noise=False) + imgs = make_image(img_gen) + src_img = imgs[0] + dst_img = imgs[1] + + dst_latent = (latent + offset * direction)[0].numpy().astype('float32') + + return src_img, dst_img, dst_latent diff --git a/modules/image/Image_gan/gan/styleganv2_editing/module.py b/modules/image/Image_gan/gan/styleganv2_editing/module.py new file mode 100644 index 0000000000000000000000000000000000000000..1e90060bd1005f3c91708e2ccd44a34e6132aef3 --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_editing/module.py @@ -0,0 +1,155 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import copy + +import paddle +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +import numpy as np +import cv2 +from skimage.io import imread +from skimage.transform import rescale, resize + +from .model import StyleGANv2EditingPredictor +from .util import base64_to_cv2 + + +@moduleinfo( + name="styleganv2_editing", + type="CV/style_transfer", + author="paddlepaddle", + author_email="", + summary="", + version="1.0.0") +class styleganv2_editing: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "stylegan2-ffhq-config-f-directions.pdparams") + + self.network = StyleGANv2EditingPredictor(direction_path=self.pretrained_model, model_type='ffhq-config-f') + self.pixel2style2pixel_module = hub.Module(name='pixel2style2pixel') + + def generate(self, + images=None, + paths=None, + direction_name='age', + direction_offset=0.0, + output_dir='./editing_result/', + use_gpu=False, + visualization=True): + ''' + + + images (list[numpy.ndarray]): data of images, shape of each is [H, W, C], color space must be BGR(read by cv2). + paths (list[str]): paths to image. + direction_name(str): Attribute to be manipulated,For ffhq-conf-f, we have: age, eyes_open, eye_distance, eye_eyebrow_distance, eye_ratio, gender, lip_ratio, mouth_open, mouth_ratio, nose_mouth_distance, nose_ratio, nose_tip, pitch, roll, smile, yaw. + direction_offset(float): Offset strength of the attribute. + output_dir: the dir to save the results + use_gpu: if True, use gpu to perform the computation, otherwise cpu. + visualization: if True, save results in output_dir. + ''' + results = [] + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + if images == None and paths == None: + print('No image provided. Please input an image or a image path.') + return + + if images != None: + for image in images: + image = image[:, :, ::-1] + _, latent = self.pixel2style2pixel_module.network.run(image) + out = self.network.run(latent, direction_name, direction_offset) + results.append(out) + + if paths != None: + for path in paths: + image = cv2.imread(path)[:, :, ::-1] + _, latent = self.pixel2style2pixel_module.network.run(image) + out = self.network.run(latent, direction_name, direction_offset) + results.append(out) + + if visualization == True: + if not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + for i, out in enumerate(results): + if out is not None: + cv2.imwrite(os.path.join(output_dir, 'src_{}.png'.format(i)), out[0][:, :, ::-1]) + cv2.imwrite(os.path.join(output_dir, 'dst_{}.png'.format(i)), out[1][:, :, ::-1]) + np.save(os.path.join(output_dir, 'dst_{}.npy'.format(i)), out[2]) + + return results + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + results = self.generate( + paths=[self.args.input_path], + direction_name=self.args.direction_name, + direction_offset=self.args.direction_offset, + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + return results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.generate(images=images_decode, **kwargs) + tolist = [result.tolist() for result in results] + return tolist + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='editing_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to input image.") + self.arg_input_group.add_argument( + '--direction_name', + type=str, + default='age', + help= + "Attribute to be manipulated,For ffhq-conf-f, we have: age, eyes_open, eye_distance, eye_eyebrow_distance, eye_ratio, gender, lip_ratio, mouth_open, mouth_ratio, nose_mouth_distance, nose_ratio, nose_tip, pitch, roll, smile, yaw." + ) + self.arg_input_group.add_argument('--direction_offset', type=float, help="Offset strength of the attribute.") diff --git a/modules/image/Image_gan/gan/styleganv2_editing/requirements.txt b/modules/image/Image_gan/gan/styleganv2_editing/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_editing/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/gan/styleganv2_editing/util.py b/modules/image/Image_gan/gan/styleganv2_editing/util.py new file mode 100644 index 0000000000000000000000000000000000000000..b88ac3562b74cadc1d4d6459a56097ca4a938a0b --- /dev/null +++ b/modules/image/Image_gan/gan/styleganv2_editing/util.py @@ -0,0 +1,10 @@ +import base64 +import cv2 +import numpy as np + + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data diff --git a/modules/image/Image_gan/gan/wav2lip/README.md b/modules/image/Image_gan/gan/wav2lip/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5305725a65bb12a8d4cf4c0f18c655b4c07c2841 --- /dev/null +++ b/modules/image/Image_gan/gan/wav2lip/README.md @@ -0,0 +1,94 @@ +# wav2lip + +|模型名称|wav2lip| +| :--- | :---: | +|类别|图像 - 视频生成| +|网络|Wav2Lip| +|数据集|LRS2| +|是否支持Fine-tuning|否| +|模型大小|139MB| +|最新更新日期|2021-12-14| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出视频 +
+

+ + +- ### 模型介绍 + + - Wav2Lip实现的是视频人物根据输入音频生成与语音同步的人物唇形,使得生成的视频人物口型与输入语音同步。Wav2Lip不仅可以基于静态图像来输出与目标语音匹配的唇形同步视频,还可以直接将动态的视频进行唇形转换,输出与目标语音匹配的视频。Wav2Lip实现唇形与语音精准同步突破的关键在于,它采用了唇形同步判别器,以强制生成器持续产生准确而逼真的唇部运动。此外,它通过在鉴别器中使用多个连续帧而不是单个帧,并使用视觉质量损失(而不仅仅是对比损失)来考虑时间相关性,从而改善了视觉质量。Wav2Lip适用于任何人脸、任何语言,对任意视频都能达到很高都准确率,可以无缝地与原始视频融合,还可以用于转换动画人脸。 + + + +## 二、安装 + +- ### 1、环境依赖 + - ffmpeg + - libsndfile +- ### 2、安装 + + - ```shell + $ hub install wav2lip + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + # Read from a file + $ hub run wav2lip --face "/PATH/TO/VIDEO or IMAGE" --audio "/PATH/TO/AUDIO" + ``` + - 通过命令行方式人物唇形生成模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + module = hub.Module(name="wav2lip") + face_input_path = "/PATH/TO/VIDEO or IMAGE" + audio_input_path = "/PATH/TO/AUDIO" + module.wav2lip_transfer(face=face_input_path, audio=audio_input_path, output_dir='./transfer_result/', use_gpu=True) + ``` + +- ### 3、API + + - ```python + def wav2lip_transfer(face, audio, output_dir ='./output_result/', use_gpu=False, visualization=True): + ``` + - 人脸唇形生成API。 + + - **参数** + + - face (str): 视频或图像文件的路径
+ - audio (str): 音频文件的路径
+ - output\_dir (str): 结果保存的路径;
+ - use\_gpu (bool): 是否使用 GPU;
+ - visualization(bool): 是否保存结果到本地文件夹 + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install wav2lip==1.0.0 + ``` diff --git a/modules/image/Image_gan/gan/wav2lip/model.py b/modules/image/Image_gan/gan/wav2lip/model.py new file mode 100644 index 0000000000000000000000000000000000000000..3fa32ed9c384e74cf569ef0daa09215539355d8e --- /dev/null +++ b/modules/image/Image_gan/gan/wav2lip/model.py @@ -0,0 +1,259 @@ +from os import listdir, path, makedirs +import platform +import numpy as np +import scipy, cv2, os, sys, argparse +import json, subprocess, random, string +from tqdm import tqdm +from glob import glob +import paddle +from paddle.utils.download import get_weights_path_from_url +from ppgan.faceutils import face_detection +from ppgan.utils import audio +from ppgan.models.generators.wav2lip import Wav2Lip + +WAV2LIP_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/wav2lip_hq.pdparams' +mel_step_size = 16 + + +class Wav2LipPredictor: + def __init__(self, + checkpoint_path=None, + static=False, + fps=25, + pads=[0, 10, 0, 0], + face_det_batch_size=16, + wav2lip_batch_size=128, + resize_factor=1, + crop=[0, -1, 0, -1], + box=[-1, -1, -1, -1], + rotate=False, + nosmooth=False, + face_detector='sfd', + face_enhancement=False): + self.img_size = 96 + self.checkpoint_path = checkpoint_path + self.static = static + self.fps = fps + self.pads = pads + self.face_det_batch_size = face_det_batch_size + self.wav2lip_batch_size = wav2lip_batch_size + self.resize_factor = resize_factor + self.crop = crop + self.box = box + self.rotate = rotate + self.nosmooth = nosmooth + self.face_detector = face_detector + self.face_enhancement = face_enhancement + if face_enhancement: + from ppgan.faceutils.face_enhancement import FaceEnhancement + self.faceenhancer = FaceEnhancement() + makedirs('./temp', exist_ok=True) + + def get_smoothened_boxes(self, boxes, T): + for i in range(len(boxes)): + if i + T > len(boxes): + window = boxes[len(boxes) - T:] + else: + window = boxes[i:i + T] + boxes[i] = np.mean(window, axis=0) + return boxes + + def face_detect(self, images): + detector = face_detection.FaceAlignment( + face_detection.LandmarksType._2D, flip_input=False, face_detector=self.face_detector) + + batch_size = self.face_det_batch_size + + while 1: + predictions = [] + try: + for i in tqdm(range(0, len(images), batch_size)): + predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size]))) + except RuntimeError: + if batch_size == 1: + raise RuntimeError( + 'Image too big to run face detection on GPU. Please use the --resize_factor argument') + batch_size //= 2 + print('Recovering from OOM error; New batch size: {}'.format(batch_size)) + continue + break + + results = [] + pady1, pady2, padx1, padx2 = self.pads + for rect, image in zip(predictions, images): + if rect is None: + cv2.imwrite('temp/faulty_frame.jpg', image) # check this frame where the face was not detected. + raise ValueError('Face not detected! Ensure the video contains a face in all the frames.') + + y1 = max(0, rect[1] - pady1) + y2 = min(image.shape[0], rect[3] + pady2) + x1 = max(0, rect[0] - padx1) + x2 = min(image.shape[1], rect[2] + padx2) + + results.append([x1, y1, x2, y2]) + + boxes = np.array(results) + if not self.nosmooth: boxes = self.get_smoothened_boxes(boxes, T=5) + results = [[image[y1:y2, x1:x2], (y1, y2, x1, x2)] for image, (x1, y1, x2, y2) in zip(images, boxes)] + + del detector + return results + + def datagen(self, frames, mels): + img_batch, mel_batch, frame_batch, coords_batch = [], [], [], [] + + if self.box[0] == -1: + if not self.static: + face_det_results = self.face_detect(frames) # BGR2RGB for CNN face detection + else: + face_det_results = self.face_detect([frames[0]]) + else: + print('Using the specified bounding box instead of face detection...') + y1, y2, x1, x2 = self.box + face_det_results = [[f[y1:y2, x1:x2], (y1, y2, x1, x2)] for f in frames] + + for i, m in enumerate(mels): + idx = 0 if self.static else i % len(frames) + frame_to_save = frames[idx].copy() + face, coords = face_det_results[idx].copy() + + face = cv2.resize(face, (self.img_size, self.img_size)) + + img_batch.append(face) + mel_batch.append(m) + frame_batch.append(frame_to_save) + coords_batch.append(coords) + + if len(img_batch) >= self.wav2lip_batch_size: + img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch) + + img_masked = img_batch.copy() + img_masked[:, self.img_size // 2:] = 0 + + img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255. + mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]) + + yield img_batch, mel_batch, frame_batch, coords_batch + img_batch, mel_batch, frame_batch, coords_batch = [], [], [], [] + + if len(img_batch) > 0: + img_batch, mel_batch = np.asarray(img_batch), np.asarray(mel_batch) + + img_masked = img_batch.copy() + img_masked[:, self.img_size // 2:] = 0 + + img_batch = np.concatenate((img_masked, img_batch), axis=3) / 255. + mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]) + + yield img_batch, mel_batch, frame_batch, coords_batch + + def run(self, face, audio_seq, output_dir, visualization=True): + if os.path.isfile(face) and path.basename(face).split('.')[1] in ['jpg', 'png', 'jpeg']: + self.static = True + + if not os.path.isfile(face): + raise ValueError('--face argument must be a valid path to video/image file') + + elif path.basename(face).split('.')[1] in ['jpg', 'png', 'jpeg']: + full_frames = [cv2.imread(face)] + fps = self.fps + + else: + video_stream = cv2.VideoCapture(face) + fps = video_stream.get(cv2.CAP_PROP_FPS) + + print('Reading video frames...') + + full_frames = [] + while 1: + still_reading, frame = video_stream.read() + if not still_reading: + video_stream.release() + break + if self.resize_factor > 1: + frame = cv2.resize(frame, + (frame.shape[1] // self.resize_factor, frame.shape[0] // self.resize_factor)) + + if self.rotate: + frame = cv2.rotate(frame, cv2.cv2.ROTATE_90_CLOCKWISE) + + y1, y2, x1, x2 = self.crop + if x2 == -1: x2 = frame.shape[1] + if y2 == -1: y2 = frame.shape[0] + + frame = frame[y1:y2, x1:x2] + + full_frames.append(frame) + + print("Number of frames available for inference: " + str(len(full_frames))) + + if not audio_seq.endswith('.wav'): + print('Extracting raw audio...') + command = 'ffmpeg -y -i {} -strict -2 {}'.format(audio_seq, 'temp/temp.wav') + + subprocess.call(command, shell=True) + audio_seq = 'temp/temp.wav' + + wav = audio.load_wav(audio_seq, 16000) + mel = audio.melspectrogram(wav) + if np.isnan(mel.reshape(-1)).sum() > 0: + raise ValueError( + 'Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again') + + mel_chunks = [] + mel_idx_multiplier = 80. / fps + i = 0 + while 1: + start_idx = int(i * mel_idx_multiplier) + if start_idx + mel_step_size > len(mel[0]): + mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:]) + break + mel_chunks.append(mel[:, start_idx:start_idx + mel_step_size]) + i += 1 + + print("Length of mel chunks: {}".format(len(mel_chunks))) + + full_frames = full_frames[:len(mel_chunks)] + + batch_size = self.wav2lip_batch_size + gen = self.datagen(full_frames.copy(), mel_chunks) + + model = Wav2Lip() + if self.checkpoint_path is None: + model_weights_path = get_weights_path_from_url(WAV2LIP_WEIGHT_URL) + weights = paddle.load(model_weights_path) + else: + weights = paddle.load(self.checkpoint_path) + model.load_dict(weights) + model.eval() + print("Model loaded") + for i, (img_batch, mel_batch, frames, coords) in enumerate( + tqdm(gen, total=int(np.ceil(float(len(mel_chunks)) / batch_size)))): + if i == 0: + + frame_h, frame_w = full_frames[0].shape[:-1] + out = cv2.VideoWriter('temp/result.avi', cv2.VideoWriter_fourcc(*'DIVX'), fps, (frame_w, frame_h)) + + img_batch = paddle.to_tensor(np.transpose(img_batch, (0, 3, 1, 2))).astype('float32') + mel_batch = paddle.to_tensor(np.transpose(mel_batch, (0, 3, 1, 2))).astype('float32') + + with paddle.no_grad(): + pred = model(mel_batch, img_batch) + + pred = pred.numpy().transpose(0, 2, 3, 1) * 255. + + for p, f, c in zip(pred, frames, coords): + y1, y2, x1, x2 = c + if self.face_enhancement: + p = self.faceenhancer.enhance_from_image(p) + p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1)) + + f[y1:y2, x1:x2] = p + out.write(f) + + out.release() + os.makedirs(output_dir, exist_ok=True) + if visualization: + command = 'ffmpeg -y -i {} -i {} -strict -2 -q:v 1 {}'.format(audio_seq, 'temp/result.avi', + os.path.join(output_dir, 'result.avi')) + subprocess.call(command, shell=platform.system() != 'Windows') diff --git a/modules/image/Image_gan/gan/wav2lip/module.py b/modules/image/Image_gan/gan/wav2lip/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f16191d8984e33f38246e7985a8bb3f7f2aa74b0 --- /dev/null +++ b/modules/image/Image_gan/gan/wav2lip/module.py @@ -0,0 +1,101 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import argparse +import copy + +import paddle +import paddlehub as hub +from paddlehub.module.module import moduleinfo, runnable, serving +import numpy as np +import cv2 + +from .model import Wav2LipPredictor + + +@moduleinfo(name="wav2lip", type="CV/generation", author="paddlepaddle", author_email="", summary="", version="1.0.0") +class wav2lip: + def __init__(self): + self.pretrained_model = os.path.join(self.directory, "wav2lip_hq.pdparams") + + self.network = Wav2LipPredictor( + checkpoint_path=self.pretrained_model, + static=False, + fps=25, + pads=[0, 10, 0, 0], + face_det_batch_size=16, + wav2lip_batch_size=128, + resize_factor=1, + crop=[0, -1, 0, -1], + box=[-1, -1, -1, -1], + rotate=False, + nosmooth=False, + face_detector='sfd', + face_enhancement=True) + + def wav2lip_transfer(self, face, audio, output_dir='./output_result/', use_gpu=False, visualization=True): + ''' + face (str): path to video/image that contains faces to use. + audio (str): path to input audio. + output_dir: the dir to save the results + use_gpu: if True, use gpu to perform the computation, otherwise cpu. + visualization: if True, save results in output_dir. + ''' + paddle.disable_static() + place = 'gpu:0' if use_gpu else 'cpu' + place = paddle.set_device(place) + self.network.run(face, audio, output_dir, visualization) + + @runnable + def run_cmd(self, argvs: list): + """ + Run as a command. + """ + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + self.arg_config_group = self.parser.add_argument_group( + title="Config options", description="Run configuration for controlling module behavior, not required.") + self.add_module_config_arg() + self.add_module_input_arg() + self.args = self.parser.parse_args(argvs) + self.wav2lip_transfer( + face=self.args.face, + audio=self.args.audio, + output_dir=self.args.output_dir, + use_gpu=self.args.use_gpu, + visualization=self.args.visualization) + return + + def add_module_config_arg(self): + """ + Add the command config options. + """ + self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") + + self.arg_config_group.add_argument( + '--output_dir', type=str, default='output_result', help='output directory for saving result.') + self.arg_config_group.add_argument('--visualization', type=bool, default=False, help='save results or not.') + + def add_module_input_arg(self): + """ + Add the command input options. + """ + self.arg_input_group.add_argument('--audio', type=str, help="path to input audio.") + self.arg_input_group.add_argument('--face', type=str, help="path to video/image that contains faces to use.") diff --git a/modules/image/Image_gan/gan/wav2lip/requirements.txt b/modules/image/Image_gan/gan/wav2lip/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..67e9bb6fa840355e9ed0d44b7134850f1fe22fe1 --- /dev/null +++ b/modules/image/Image_gan/gan/wav2lip/requirements.txt @@ -0,0 +1 @@ +ppgan diff --git a/modules/image/Image_gan/stargan_celeba/README.md b/modules/image/Image_gan/stargan_celeba/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b5a160274dae031ae2824b93416eb1395b814770 --- /dev/null +++ b/modules/image/Image_gan/stargan_celeba/README.md @@ -0,0 +1,102 @@ +# stargan_celeba + +|模型名称|stargan_celeba| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|STGAN| +|数据集|Celeba| +|是否支持Fine-tuning|否| +|模型大小|33MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: + +

+
+ 图1. StarGAN的效果图 (属性分别为:origial image, Black_Hair, Blond_Hair, Brown_Hair, Male, Aged)
+

+ + +- ### 模型介绍 + + - StarGAN 是为了解决跨多个域、多个数据集的训练而提出的生成对抗网络模型。单个 StarGAN 模型就可以实现多个风格域的转换。 该 PaddleHub Module 使用 Celeba 数据集训练完成,目前支持 "Black_Hair", "Blond_Hair", "Brown_Hair", "Female", "Male", "Aged" 这六种人脸属性转换。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.5.2 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install stargan_celeba==1.0.0 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run stargan_celeba --image "/PATH/TO/IMAGE" --style "target_attribute" + ``` + - **参数** + + - image :指定图片路径。 + + - style 指定拟转换的属性,可选择 "Black_Hair", "Blond_Hair", "Brown_Hair", "Female", "Male", "Aged" 中的一个。 + + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + stargan = hub.Module(name="stargan_celeba") + test_img_path = ["/PATH/TO/IMAGE"] + trans_attr = ["Blond_Hair"] + + # set input dict + input_dict = {"image": test_img_path, "style": trans_attr} + + # execute predict and print the result + results = stargan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - 风格转换API,用于图像生成。 + + - **参数** + + - data: dict 类型,有以下字段 + - image (list\[str\]): list中每个元素为待转换的图片路径。 + - style (list\[str\]): list中每个元素为字符串,填写待转换的人脸属性。 + + - **返回** + - res (list\[str\]): 提示生成图片的保存路径。 + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + diff --git a/modules/image/Image_gan/stgan_celeba/README.md b/modules/image/Image_gan/stgan_celeba/README.md new file mode 100644 index 0000000000000000000000000000000000000000..52e22e019e5d576d41d58ddc53c4a51a7870e130 --- /dev/null +++ b/modules/image/Image_gan/stgan_celeba/README.md @@ -0,0 +1,106 @@ +# stgan_celeba + +|模型名称|stgan_celeba| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|STGAN| +|数据集|Celeba| +|是否支持Fine-tuning|否| +|模型大小|287MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: + +

+
+ STGAN的效果图(图片属性分别为:original image, Bald, Bangs, Black_Hair, Blond_Hair, Brown_Hair, Bushy_Eyebrows, Eyeglasses, Gender, Mouth_Slightly_Open, Mustache, No_Beard, Pale_Skin, Aged)
+

+ + +- ### 模型介绍 + + - STGAN 以原属性和目标属性的差值作为输入,并创造性地提出了 STUs (Selective transfer units) 来选择和修改 encoder 的特征,从而改善了转换效果和处理能力。 该 PaddleHub Module 使用 Celeba 数据集训练完成,目前支持 "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged" 这十三种人脸属性转换。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.5.2 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install stgan_celeba==1.0.0 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run stgan_celeba --image "/PATH/TO/IMAGE" --info "original_attributes" --style "target_attribute" + ``` + - **参数** + + - image :指定图片路径。 + + - info :原图的属性,必须填写性别( "Male" 或者 "Female")。可选值有:"Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged" 。比如输入图片是一个女孩,有着黑头发,那么就填写为 "Female,Black_Hair"。建议尽可能完整地填写原图具备的属性,比如一个黑发女孩还戴了眼镜,那么应填写为 "Female,Black_Hair,Eyeglasses",否则有可能转换失败。 + + - style 指定拟转换的属性,可选择 "Bald", "Bangs", "Black_Hair", "Blond_Hair", "Brown_Hair", "Bushy_Eyebrows", "Eyeglasses", "Gender", "Mouth_Slightly_Open", "Mustache", "No_Beard", "Pale_Skin", "Aged" 中的一种。 + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + stgan = hub.Module(name="stgan_celeba") + + test_img_path = ["/PATH/TO/IMAGE"] + org_info = ["Female,Black_Hair"] + trans_attr = ["Bangs"] + + # set input dict + input_dict = {"image": test_img_path, "style": trans_attr, "info": org_info} + + # execute predict and print the result + results = stgan.generate(data=input_dict) + print(results) + ``` + +- ### 3、API + + - ```python + def generate(data) + ``` + + - 风格转换API,用于图像生成。 + + - **参数** + + - data: dict 类型,有以下字段 + - image (list\[str\]): list中每个元素为待转换的图片路径。 + - style (list\[str\]): list中每个元素为字符串,填写待转换的人脸属性。 + - info (list\[str\]): 表示原图具备的人脸属性,填得越详细效果会越好,不同属性用逗号隔开。 + + + - **返回** + - res (list\[str\]): 提示生成图片的保存路径。 + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/Image_gan/style_transfer/ID_Photo_GEN/README.md b/modules/image/Image_gan/style_transfer/ID_Photo_GEN/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6957e9a03f1c2116263d37ac06e5dded42f1575e --- /dev/null +++ b/modules/image/Image_gan/style_transfer/ID_Photo_GEN/README.md @@ -0,0 +1,97 @@ +# ID_Photo_GEN + +|模型名称|ID_Photo_GEN| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|HRNet_W18| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|28KB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +

+ + +- ### 模型介绍 + + - 基于face_landmark_localization和FCN_HRNet_W18_Face_Seg模型实现的证件照生成模型,一键生成白底、红底和蓝底的人像照片 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ID_Photo_GEN + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='ID_Photo_GEN') + + result = model.Photo_GEN( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + output_dir='output', + visualization=True, + use_gpu=False) + ``` + +- ### 2、API + + - ```python + def Photo_GEN( + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False, + use_gpu=False): + ``` + + - 证件照生成API + + - **参数** + * images (list[np.ndarray]) : 输入图像数据列表(BGR) + * paths (list[str]) : 输入图像路径列表 + * batch_size (int) : 数据批大小 + * output_dir (str) : 可视化图像输出目录 + * visualization (bool) : 是否可视化 + * use_gpu (bool) : 是否使用 GPU 进行推理 + + **NOTE:** paths和images两个参数选择其一进行提供数据 + + - **返回** + + * results (list[dict{"write":np.ndarray,"blue":np.ndarray,"red":np.ndarray}]): 输出图像数据列表 + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/thirdparty/image/Image_gan/style_transfer/ID_Photo_GEN/module.py b/modules/image/Image_gan/style_transfer/ID_Photo_GEN/module.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/ID_Photo_GEN/module.py rename to modules/image/Image_gan/style_transfer/ID_Photo_GEN/module.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/README.md b/modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/README.md rename to modules/image/Image_gan/style_transfer/Photo2Cartoon/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/model/__init__.py b/modules/image/Image_gan/style_transfer/Photo2Cartoon/model/__init__.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/model/__init__.py rename to modules/image/Image_gan/style_transfer/Photo2Cartoon/model/__init__.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/model/networks.py b/modules/image/Image_gan/style_transfer/Photo2Cartoon/model/networks.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/model/networks.py rename to modules/image/Image_gan/style_transfer/Photo2Cartoon/model/networks.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/module.py b/modules/image/Image_gan/style_transfer/Photo2Cartoon/module.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/Photo2Cartoon/module.py rename to modules/image/Image_gan/style_transfer/Photo2Cartoon/module.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/README.md b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md similarity index 90% rename from modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/README.md rename to modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md index e2724618d42095ebf05e410ece2ced9b06c831d6..4175ec598c6e02a65a744a9b26dd7c00aa2efd43 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/README.md +++ b/modules/image/Image_gan/style_transfer/U2Net_Portrait/README.md @@ -50,16 +50,16 @@ ## 三、模型API预测 -- ### 1、代码示例 +- ### 1、预测代码示例 - ```python import paddlehub as hub import cv2 model = hub.Module(name="U2Net_Portrait") - result = model.Cartoon_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) + result = model.Portrait_GEN(images=[cv2.imread('/PATH/TO/IMAGE')]) # or - # result = model.Cartoon_GEN(paths=['/PATH/TO/IMAGE']) + # result = model.Portrait_GEN(paths=['/PATH/TO/IMAGE']) ``` - ### 2、API diff --git a/modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/module.py b/modules/image/Image_gan/style_transfer/U2Net_Portrait/module.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/module.py rename to modules/image/Image_gan/style_transfer/U2Net_Portrait/module.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/processor.py b/modules/image/Image_gan/style_transfer/U2Net_Portrait/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/processor.py rename to modules/image/Image_gan/style_transfer/U2Net_Portrait/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/u2net.py b/modules/image/Image_gan/style_transfer/U2Net_Portrait/u2net.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/U2Net_Portrait/u2net.py rename to modules/image/Image_gan/style_transfer/U2Net_Portrait/u2net.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/README.md b/modules/image/Image_gan/style_transfer/UGATIT_100w/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/README.md rename to modules/image/Image_gan/style_transfer/UGATIT_100w/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/model.py b/modules/image/Image_gan/style_transfer/UGATIT_100w/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/model.py rename to modules/image/Image_gan/style_transfer/UGATIT_100w/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/module.py b/modules/image/Image_gan/style_transfer/UGATIT_100w/module.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/module.py rename to modules/image/Image_gan/style_transfer/UGATIT_100w/module.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/processor.py b/modules/image/Image_gan/style_transfer/UGATIT_100w/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_100w/processor.py rename to modules/image/Image_gan/style_transfer/UGATIT_100w/processor.py diff --git a/modules/image/Image_gan/style_transfer/UGATIT_83w/README.md b/modules/image/Image_gan/style_transfer/UGATIT_83w/README.md new file mode 100644 index 0000000000000000000000000000000000000000..82bbf44afa06f2d03bb89f010d46a36ee5cf3b73 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_83w/README.md @@ -0,0 +1,141 @@ +# UGATIT_83w + +|模型名称|UGATIT_83w| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|U-GAT-IT| +|数据集|selfie2anime| +|是否支持Fine-tuning|否| +|模型大小|41MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例(左为原图,右为效果图): +

+ +

+ + + +- ### 模型介绍 + + - UGATIT 图像风格转换模型, 模型可将输入的人脸图像转换成动漫风格. + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.8.2 + + - paddlehub >= 1.8.0 + +- ### 2、安装 + + - ```shell + $ hub install UGATIT_83w + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import cv2 + import paddlehub as hub + + # 模型加载 + # use_gpu:是否使用GPU进行预测 + model = hub.Module(name='UGATIT_83w', use_gpu=False) + + # 模型预测 + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False + ) + ``` + + - 风格转换API,将输入的人脸图像转换成动漫风格。 + + - **参数** + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; + * paths (list\[str\]): 图片的路径,默认为 None; + * batch\_size (int): batch 的大小,默认设为 1; + * visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; + * output\_dir (str): 图片的保存路径,默认设为 output + + **NOTE:** paths和images两个参数选择其一进行提供数据 + + - **返回** + + - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m UGATIT_83w + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/UGATIT_83w" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 \ No newline at end of file diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/model.py b/modules/image/Image_gan/style_transfer/UGATIT_83w/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/model.py rename to modules/image/Image_gan/style_transfer/UGATIT_83w/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/module.py b/modules/image/Image_gan/style_transfer/UGATIT_83w/module.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/module.py rename to modules/image/Image_gan/style_transfer/UGATIT_83w/module.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/processor.py b/modules/image/Image_gan/style_transfer/UGATIT_83w/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/processor.py rename to modules/image/Image_gan/style_transfer/UGATIT_83w/processor.py diff --git a/modules/image/Image_gan/style_transfer/UGATIT_92w/README.md b/modules/image/Image_gan/style_transfer/UGATIT_92w/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8108976faeaa9bccad1af206a9aa6a34115dffc0 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/UGATIT_92w/README.md @@ -0,0 +1,141 @@ +# UGATIT_92w + +|模型名称|UGATIT_92w| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|U-GAT-IT| +|数据集|selfie2anime| +|是否支持Fine-tuning|否| +|模型大小|41MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例(左为原图,右为效果图): +

+ +

+ + + +- ### 模型介绍 + + - UGATIT 图像风格转换模型, 模型可将输入的人脸图像转换成动漫风格. + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.8.2 + + - paddlehub >= 1.8.0 + +- ### 2、安装 + + - ```shell + $ hub install UGATIT_92w + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import cv2 + import paddlehub as hub + + # 模型加载 + # use_gpu:是否使用GPU进行预测 + model = hub.Module(name='UGATIT_92w', use_gpu=False) + + # 模型预测 + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer( + self, + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False + ) + ``` + + - 风格转换API,将输入的人脸图像转换成动漫风格。 + + - **参数** + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; + * paths (list\[str\]): 图片的路径,默认为 None; + * batch\_size (int): batch 的大小,默认设为 1; + * visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; + * output\_dir (str): 图片的保存路径,默认设为 output + + **NOTE:** paths和images两个参数选择其一进行提供数据 + + - **返回** + + - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m UGATIT_92w + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/UGATIT_92w" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 \ No newline at end of file diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/model.py b/modules/image/Image_gan/style_transfer/UGATIT_92w/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/model.py rename to modules/image/Image_gan/style_transfer/UGATIT_92w/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/module.py b/modules/image/Image_gan/style_transfer/UGATIT_92w/module.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/module.py rename to modules/image/Image_gan/style_transfer/UGATIT_92w/module.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/processor.py b/modules/image/Image_gan/style_transfer/UGATIT_92w/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/processor.py rename to modules/image/Image_gan/style_transfer/UGATIT_92w/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md rename to modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py rename to modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py rename to modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py index 7a54b230e51549bf0adc1be8f974ad6f2a48a0b7..8a27751db1d5eaf7ad74568ad3db93c6bf59eacc 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/module.py @@ -33,7 +33,8 @@ class Animegan_V1_Hayao_60(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py b/modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v1_hayao_60/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py index 1a1191b8f36f62980fbe06d2af32635bd9eabe68..a4623fcd455e0027e1ef92307575521b8b9a442c 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Hayao_64(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_64/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py index 09771b275f15def9e1fc420eaea5a731c4699ee5..28a17a517a9db310dbe0981f66cee93f02302b51 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Hayao_99(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_hayao_99/processor.py diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5dcf44fb75e084a563c27ef514848fbdd8d6176b --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md @@ -0,0 +1,148 @@ +# animegan_v2_paprika_54 + +|模型名称|animegan_v2_paprika_54| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|AnimeGAN| +|数据集|Paprika| +|是否支持Fine-tuning|否| +|模型大小|9.4MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ + +- ### 模型介绍 + + - AnimeGAN V2 图像风格转换模型, 模型可将输入的图像转换成今敏红辣椒动漫风格,模型权重转换自[AnimeGAN V2官方开源项目](https://github.com/TachibanaYoshino/AnimeGANv2)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install animegan_v2_paprika_54 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_paprika_54") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - 风格转换API,将输入的图片转换为漫画风格。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - output\_dir (str): 图片的保存路径,默认设为 output;
+ - visualization (bool): 是否将结果保存为图片文件;
+ - min\_size (int): 输入图片的短边最小尺寸,默认设为 32;
+ - max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + - **返回** + - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m animegan_v2_paprika_54 + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_54" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 适配paddlehub2.0 + +* 1.0.2 + + 删除batch_size选项 + + - ```shell + $ hub install animegan_v2_paprika_54==1.0.2 + ``` \ No newline at end of file diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py index e4b917aa336f4eb8a8819bcadc95688f475d428f..38641c79e4dff20bd5c94221842881dfb42d7e72 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Paprika_54(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_54/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py index 259b1fc8aedfa79489a4c744bf1e744828b062ee..a6f2361509851bb79341b2a93fb33a8c322182a7 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Paprika_74(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_74/processor.py diff --git a/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ff8b5a3e95ff9155ceb016a1e3ec6dc08f7c18c0 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md @@ -0,0 +1,147 @@ +# animegan_v2_paprika_97 + +|模型名称|animegan_v2_paprika_97| +| :--- | :---: | +|类别|图像 - 图像生成| +|网络|AnimeGAN| +|数据集|Paprika| +|是否支持Fine-tuning|否| +|模型大小|9.7MB| +|最新更新日期|2021-07-30| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+ +
+ 输入图像 +
+ +
+ 输出图像 +
+

+ + + +- ### 模型介绍 + + - AnimeGAN V2 图像风格转换模型, 模型可将输入的图像转换成红辣椒动漫风格,模型权重转换自[AnimeGAN V2官方开源项目](https://github.com/TachibanaYoshino/AnimeGAN)。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install animegan_v2_paprika_97 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + model = hub.Module(name="animegan_v2_paprika_97") + result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = model.style_transfer(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 2、API + + - ```python + def style_transfer(images=None, + paths=None, + output_dir='output', + visualization=False, + min_size=32, + max_size=1024) + ``` + + - 风格转换API,将输入的图片转换为漫画风格。 + + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
+ - paths (list\[str\]): 图片的路径;
+ - output\_dir (str): 图片的保存路径,默认设为 output;
+ - visualization (bool): 是否将识别结果保存为图片文件;
+ - min\_size (int): 输入图片的短边最小尺寸,默认设为 32;
+ - max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 + + **NOTE:** paths和images两个参数选择其一进行提供数据 + + - **返回** + - res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\] + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线图像风格转换服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m animegan_v2_paprika_97 + ``` + + - 这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_97" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 适配paddlehub2.0 + +* 1.0.2 + + 删除batch_size选项 diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py index 5a6a9f6c7c58fb1e969093f1f7d1eed2947018d5..73821ac3fc560092f377c7b5a4d3b9c4e273b199 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Paprika_97(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_97/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py index f41c6f704710de9cd41a759b649c6a4ee8f4cc8d..4c729c5dbdd9a09228badf2248e109db39bfb03a 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Paprika_98(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_paprika_98/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py index be0be188e7b423c0743a38dab2a0ad9d78124ce5..f5d95855e057be83c39953c9e363faa3720aa731 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Shinkai_33(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_33/processor.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/README.md diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/model.py diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py similarity index 92% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py index 21bf5fc4962e49116f0dc0f85d93433d24231de8..609f384201002df11021d9cf0bf09dc95a461086 100644 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py +++ b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/module.py @@ -33,7 +33,8 @@ class Animegan_V2_Shinkai_53(Module): min_size=32, max_size=1024): # 加载数据处理器 - processor = Processor(images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) + processor = Processor( + images=images, paths=paths, batch_size=1, output_dir=output_dir, min_size=min_size, max_size=max_size) # 模型预测 outputs = self.model.predict(processor.input_datas) diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py b/modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py similarity index 100% rename from modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py rename to modules/image/Image_gan/style_transfer/animegan_v2_shinkai_53/processor.py diff --git a/modules/image/Image_gan/style_transfer/msgnet/README.md b/modules/image/Image_gan/style_transfer/msgnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b2ead3a2a4c3e185ef2edf31c8b0e8ceac817451 --- /dev/null +++ b/modules/image/Image_gan/style_transfer/msgnet/README.md @@ -0,0 +1,187 @@ +# msgnet + +|模型名称|msgnet| +| :--- | :---: | +|类别|图像-图像编辑| +|网络|msgnet| +|数据集|COCO2014| +|是否支持Fine-tuning|是| +|模型大小|68MB| +|指标|-| +|最新更新日期|2021-07-29| + + +## 一、模型基本信息 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - 本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + - 更多详情请参考:[msgnet](https://github.com/zhanghang1989/PyTorch-Multi-Style-Transfer) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install msgnet + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1.命令行预测 + +``` +$ hub run msgnet --input_path "/PATH/TO/ORIGIN/IMAGE" --style_path "/PATH/TO/STYLE/IMAGE" +``` + +- ### 2.预测代码示例 + +```python +import paddle +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='msgnet') + result = model.predict(origin=["venice-boat.jpg"], style="candy.jpg", visualization=True, save_path ='style_tranfer') +``` + + + +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用msgnet模型对[MiniCOCO](../../docs/reference/datasets.md#class-hubdatasetsMiniCOCO)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + + transform = T.Compose([T.Resize((256, 256), interpolation='LINEAR')]) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets.minicoco import MiniCOCO + + styledata = MiniCOCO(transform=transform, mode='train') + + ``` + - `transforms`: 数据预处理方式。 + - `mode`: 选择数据模式,可选项有 `train`, `test`, 默认为`train`。 + + - 数据集的准备代码可以参考 [minicoco.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.MiniCOCO()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name='msgnet', load_checkpoint=None) + ``` + - `name`: 选择预训练模型的名字。 + - `load_checkpoint`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + + - Step4: 选择优化策略和运行配置 + + - ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='test_style_ckpt') + trainer.train(styledata, epochs=101, batch_size=4, eval_dataset=styledata, log_interval=10, save_interval=10) + ``` + + + + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。我们使用该模型来进行预测。predict.py脚本如下: + + ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + model = hub.Module(name='msgnet', load_checkpoint="/PATH/TO/CHECKPOINT") + result = model.predict(origin=["venice-boat.jpg"], style="candy.jpg", visualization=True, save_path ='style_tranfer') + ``` + + - 参数配置正确后,请执行脚本`python predict.py`, 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + + - **Args** + * `origin`:原始图像路径或BGR格式图片; + * `style`: 风格图像路径; + * `visualization`: 是否可视化,默认为True; + * `save_path`: 保存结果的路径,默认保存路径为'style_tranfer'。 + + **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线风格迁移服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m msgnet + ``` + + - 这样就完成了一个风格迁移服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/ORIGIN/IMAGE') + style_im = cv2.imread('/PATH/TO/STYLE/IMAGE') + data = {'images':[[cv2_to_base64(org_im)], cv2_to_base64(style_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/msgnet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data = base64_to_cv2(r.json()["results"]['data'][0]) + cv2.imwrite('style.png', data) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/classification/DriverStatusRecognition/README.md b/modules/image/classification/DriverStatusRecognition/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9183c607af9f5405edcf4ab7a829b012803da17d --- /dev/null +++ b/modules/image/classification/DriverStatusRecognition/README.md @@ -0,0 +1,90 @@ +# DriverStatusRecognition + +|模型名称|DriverStatusRecognition| +| :--- | :---: | +|类别|图像-图像分类| +|网络|MobileNetV3_small_ssld| +|数据集|分心司机检测数据集| +|是否支持Fine-tuning|否| +|模型大小|6MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 驾驶员状态识别(DriverStatusRecognition),该模型可挖掘出人在疲劳状态下的表情特征,然后将这些定性的表情特征进行量化,提取出面部特征点及特征指标作为判断依据,再结合实验数据总结出基于这些参数的识别方法,最后输入获取到的状态数据进行识别和判断。该PaddleHub Module支持API预测及命令行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + - paddlex >= 1.3.7 + + +- ### 2、安装 + + - ```shell + $ hub install DriverStatusRecognition + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +- ### 3、在线体验 + [AI Studio 快速体验](https://aistudio.baidu.com/aistudio/projectdetail/1649513) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run DriverStatusRecognition --input_path /PATH/TO/IMAGE + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="DriverStatusRecognition") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - 分类接口API。 + - **参数** + - images:list类型,待检测的图像。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install DriverStatusRecognition==1.0.0 + ``` diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/__init__.py b/modules/image/classification/DriverStatusRecognition/__init__.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/__init__.py rename to modules/image/classification/DriverStatusRecognition/__init__.py diff --git a/modules/thirdparty/image/classification/DriverStatusRecognition/assets/model.yml b/modules/image/classification/DriverStatusRecognition/assets/model.yml similarity index 100% rename from modules/thirdparty/image/classification/DriverStatusRecognition/assets/model.yml rename to modules/image/classification/DriverStatusRecognition/assets/model.yml diff --git a/modules/thirdparty/image/classification/DriverStatusRecognition/module.py b/modules/image/classification/DriverStatusRecognition/module.py similarity index 100% rename from modules/thirdparty/image/classification/DriverStatusRecognition/module.py rename to modules/image/classification/DriverStatusRecognition/module.py diff --git a/modules/image/classification/DriverStatusRecognition/requirements.txt b/modules/image/classification/DriverStatusRecognition/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..736e12bdda43ec3a1d858322d7b2fdabe392531e --- /dev/null +++ b/modules/image/classification/DriverStatusRecognition/requirements.txt @@ -0,0 +1,2 @@ +paddlex==1.3.7 +chardet diff --git a/modules/thirdparty/image/classification/DriverStatusRecognition/serving_client_demo.py b/modules/image/classification/DriverStatusRecognition/serving_client_demo.py similarity index 100% rename from modules/thirdparty/image/classification/DriverStatusRecognition/serving_client_demo.py rename to modules/image/classification/DriverStatusRecognition/serving_client_demo.py diff --git a/modules/image/classification/SnakeIdentification/README.md b/modules/image/classification/SnakeIdentification/README.md new file mode 100644 index 0000000000000000000000000000000000000000..809aae6db923f222fe125c3e31952f2f46f42204 --- /dev/null +++ b/modules/image/classification/SnakeIdentification/README.md @@ -0,0 +1,90 @@ +# SnakeIdentification + +|模型名称|SnakeIdentification| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet50_vd_ssld| +|数据集|蛇种数据集| +|是否支持Fine-tuning|否| +|模型大小|84MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 蛇种识别(SnakeIdentification),该模型可准确识别蛇的种类,并精准判断蛇的毒性。该PaddleHub Module支持API预测及命令行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + - paddlex >= 1.3.7 + + +- ### 2、安装 + + - ```shell + $ hub install SnakeIdentification + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +- ### 3、在线体验 + [AI Studio 快速体验](https://aistudio.baidu.com/aistudio/projectdetail/1646951) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run SnakeIdentification --input_path /PATH/TO/IMAGE + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="SnakeIdentification") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - 分类接口API。 + - **参数** + - images:list类型,待检测的图像。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install SnakeIdentification==1.0.0 + ``` diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/__init__.py b/modules/image/classification/SnakeIdentification/__init__.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/__init__.py rename to modules/image/classification/SnakeIdentification/__init__.py diff --git a/modules/thirdparty/image/classification/SnakeIdentification/assets/model.yml b/modules/image/classification/SnakeIdentification/assets/model.yml similarity index 100% rename from modules/thirdparty/image/classification/SnakeIdentification/assets/model.yml rename to modules/image/classification/SnakeIdentification/assets/model.yml diff --git a/modules/thirdparty/image/classification/SnakeIdentification/module.py b/modules/image/classification/SnakeIdentification/module.py similarity index 100% rename from modules/thirdparty/image/classification/SnakeIdentification/module.py rename to modules/image/classification/SnakeIdentification/module.py diff --git a/modules/image/classification/SnakeIdentification/requirements.txt b/modules/image/classification/SnakeIdentification/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..307c5de765a9bb322c4deebf2bcee55109e7ce74 --- /dev/null +++ b/modules/image/classification/SnakeIdentification/requirements.txt @@ -0,0 +1 @@ +paddlex==1.3.7 diff --git a/modules/thirdparty/image/classification/SnakeIdentification/serving_client_demo.py b/modules/image/classification/SnakeIdentification/serving_client_demo.py similarity index 100% rename from modules/thirdparty/image/classification/SnakeIdentification/serving_client_demo.py rename to modules/image/classification/SnakeIdentification/serving_client_demo.py diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/README.md b/modules/image/classification/SpinalNet_Gemstones/README.md similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/README.md rename to modules/image/classification/SpinalNet_Gemstones/README.md diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/gem_dataset.py b/modules/image/classification/SpinalNet_Gemstones/gem_dataset.py similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/gem_dataset.py rename to modules/image/classification/SpinalNet_Gemstones/gem_dataset.py diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/README.md b/modules/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/README.md similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/README.md rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/README.md diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/label_list.txt b/modules/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/label_list.txt similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/label_list.txt rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/label_list.txt diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/module.py b/modules/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/module.py similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/module.py rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_res101_gemstone/module.py diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/README.md b/modules/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/README.md similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/README.md rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/README.md diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/label_list.txt b/modules/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/label_list.txt similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/label_list.txt rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/label_list.txt diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/module.py b/modules/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/module.py similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/module.py rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_res50_gemstone/module.py diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/README.md b/modules/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/README.md similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/README.md rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/README.md diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/label_list.txt b/modules/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/label_list.txt similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/label_list.txt rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/label_list.txt diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/module.py b/modules/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/module.py similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/module.py rename to modules/image/classification/SpinalNet_Gemstones/spinalnet_vgg16_gemstone/module.py diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/testImages/Cats Eye/cats_eye_3.jpg b/modules/image/classification/SpinalNet_Gemstones/testImages/Cats Eye/cats_eye_3.jpg similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/testImages/Cats Eye/cats_eye_3.jpg rename to modules/image/classification/SpinalNet_Gemstones/testImages/Cats Eye/cats_eye_3.jpg diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/testImages/Fluorite/fluorite_18.jpg b/modules/image/classification/SpinalNet_Gemstones/testImages/Fluorite/fluorite_18.jpg similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/testImages/Fluorite/fluorite_18.jpg rename to modules/image/classification/SpinalNet_Gemstones/testImages/Fluorite/fluorite_18.jpg diff --git a/modules/thirdparty/image/classification/SpinalNet_Gemstones/testImages/Kunzite/kunzite_28.jpg b/modules/image/classification/SpinalNet_Gemstones/testImages/Kunzite/kunzite_28.jpg similarity index 100% rename from modules/thirdparty/image/classification/SpinalNet_Gemstones/testImages/Kunzite/kunzite_28.jpg rename to modules/image/classification/SpinalNet_Gemstones/testImages/Kunzite/kunzite_28.jpg diff --git a/modules/image/classification/alexnet_imagenet/README.md b/modules/image/classification/alexnet_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..50fe4c0b30cbe51015358f44d8ad1a663b54f914 --- /dev/null +++ b/modules/image/classification/alexnet_imagenet/README.md @@ -0,0 +1,84 @@ +# alexnet_imagenet + +|模型名称|alexnet_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|AlexNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|234MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - AlexNet是图像分类中的经典模型。模型由Alex Krizhevsky于2012年提出,并在2012年ILSVRC比赛中夺得冠军。该PaddleHub Module结构为AlexNet,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install alexnet_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run alexnet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="alexnet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install alexnet_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/darknet53_imagenet/README.md b/modules/image/classification/darknet53_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..161f4342794229f00ea7e0baa42f73831ee460a4 --- /dev/null +++ b/modules/image/classification/darknet53_imagenet/README.md @@ -0,0 +1,84 @@ +# darknet53_imagenet + +|模型名称|darknet53_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DarkNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|160MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DarkNet 是由 Joseph Redmon 提出的图像分类模型,并应用于Yolov3 中作为 Backbone 来完成特征提取。该网络采用连续的 3*3 和 1*1 卷积进行连接,并像ResNet 一样有ShortCut连接。该 PaddleHub Module 基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install darknet53_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run darknet53_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="darknet53_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install darknet53_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet121_imagenet/README.md b/modules/image/classification/densenet121_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..548d5d98392e91638f764692354b3984ce1ef39f --- /dev/null +++ b/modules/image/classification/densenet121_imagenet/README.md @@ -0,0 +1,84 @@ +# densenet121_imagenet + +|模型名称|densenet121_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DenseNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|34MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DenseNet 是 CVPR 2017 最佳论文的模型,DenseNet 以前馈方式将每一层与其他层连接,从而 L 层网络就有 L(L+1)/2 个直接连接。对于每一层,其输入是之前的所有层的特征图,而自己的特征图作为之后所有层的输入。DenseNet 缓解了梯度消失问题,加强特征传播,促进了特征重用,并大幅减少了参数量。该PaddleHub Module结构为 DenseNet121,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install densenet121_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run densenet121_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet121_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install densenet121_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet161_imagenet/README.md b/modules/image/classification/densenet161_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..19c779407258ca49c1bcd12d0298b1f8cae0122c --- /dev/null +++ b/modules/image/classification/densenet161_imagenet/README.md @@ -0,0 +1,84 @@ +# densenet161_imagenet + +|模型名称|densenet161_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DenseNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|114MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DenseNet 是 CVPR 2017 最佳论文的模型,DenseNet 以前馈方式将每一层与其他层连接,从而 L 层网络就有 L(L+1)/2 个直接连接。对于每一层,其输入是之前的所有层的特征图,而自己的特征图作为之后所有层的输入。DenseNet 缓解了梯度消失问题,加强特征传播,促进了特征重用,并大幅减少了参数量。该PaddleHub Module结构为 DenseNet161,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install densenet161_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run densenet161_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet161_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install densenet161_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet169_imagenet/README.md b/modules/image/classification/densenet169_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..56a7bd4ea597fd569a79bad46386a5092ded34c6 --- /dev/null +++ b/modules/image/classification/densenet169_imagenet/README.md @@ -0,0 +1,84 @@ +# densenet169_imagenet + +|模型名称|densenet169_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DenseNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|59MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DenseNet 是 CVPR 2017 最佳论文的模型,DenseNet 以前馈方式将每一层与其他层连接,从而 L 层网络就有 L(L+1)/2 个直接连接。对于每一层,其输入是之前的所有层的特征图,而自己的特征图作为之后所有层的输入。DenseNet 缓解了梯度消失问题,加强特征传播,促进了特征重用,并大幅减少了参数量。该PaddleHub Module结构为 DenseNet169,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install densenet169_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run densenet169_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet169_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install densenet169_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet201_imagenet/README.md b/modules/image/classification/densenet201_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..702886c8579f6d9b786735de2e7babf0c607678a --- /dev/null +++ b/modules/image/classification/densenet201_imagenet/README.md @@ -0,0 +1,84 @@ +# densenet201_imagenet + +|模型名称|densenet201_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DenseNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|82MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DenseNet 是 CVPR 2017 最佳论文的模型,DenseNet 以前馈方式将每一层与其他层连接,从而 L 层网络就有 L(L+1)/2 个直接连接。对于每一层,其输入是之前的所有层的特征图,而自己的特征图作为之后所有层的输入。DenseNet 缓解了梯度消失问题,加强特征传播,促进了特征重用,并大幅减少了参数量。该PaddleHub Module结构为 DenseNet201,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install densenet201_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run densenet201_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet201_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install densenet201_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/densenet264_imagenet/README.md b/modules/image/classification/densenet264_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4a35aea838a761747f054d4e3749c0ddcfad3569 --- /dev/null +++ b/modules/image/classification/densenet264_imagenet/README.md @@ -0,0 +1,84 @@ +# densenet264_imagenet + +|模型名称|densenet264_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DenseNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|135MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DenseNet 是 CVPR 2017 最佳论文的模型,DenseNet 以前馈方式将每一层与其他层连接,从而 L 层网络就有 L(L+1)/2 个直接连接。对于每一层,其输入是之前的所有层的特征图,而自己的特征图作为之后所有层的输入。DenseNet 缓解了梯度消失问题,加强特征传播,促进了特征重用,并大幅减少了参数量。该PaddleHub Module结构为 DenseNet264,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install densenet264_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run densenet264_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="densenet264_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install densenet264_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn107_imagenet/README.md b/modules/image/classification/dpn107_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e97226f52d8a4ca5d8bb22ec6f523a34be189271 --- /dev/null +++ b/modules/image/classification/dpn107_imagenet/README.md @@ -0,0 +1,85 @@ +# dpn107_imagenet + +|模型名称|dpn107_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DPN| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|335MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DPN(Dual Path Networks) 是 ImageNet 2017 目标定位冠军的图像分类模型,融合了 ResNet 和 DenseNet 的核心思想。该PaddleHub Module结构为 DPN107,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install dpn107_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run dpn107_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn107_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install dpn107_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn131_imagenet/README.md b/modules/image/classification/dpn131_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1afd847c268a4907fe9dd520ac12eaa78dec88df --- /dev/null +++ b/modules/image/classification/dpn131_imagenet/README.md @@ -0,0 +1,85 @@ +# dpn131_imagenet + +|模型名称|dpn131_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DPN| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|306MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DPN(Dual Path Networks) 是 ImageNet 2017 目标定位冠军的图像分类模型,融合了 ResNet 和 DenseNet 的核心思想。该PaddleHub Module结构为 DPN98,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install dpn131_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run dpn131_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn131_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install dpn131_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn68_imagenet/README.md b/modules/image/classification/dpn68_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..72518161921afbd4b02e9722715a6aaf63b2464d --- /dev/null +++ b/modules/image/classification/dpn68_imagenet/README.md @@ -0,0 +1,85 @@ +# dpn68_imagenet + +|模型名称|dpn68_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DPN| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|50MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DPN(Dual Path Networks) 是 ImageNet 2017 目标定位冠军的图像分类模型,融合了 ResNet 和 DenseNet 的核心思想。该PaddleHub Module结构为 DPN68,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install dpn68_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run dpn68_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn68_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install dpn68_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn92_imagenet/README.md b/modules/image/classification/dpn92_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..69024027df06f87801bdaf5e7c3c44183bd4a1eb --- /dev/null +++ b/modules/image/classification/dpn92_imagenet/README.md @@ -0,0 +1,85 @@ +# dpn92_imagenet + +|模型名称|dpn92_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DPN| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|146MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DPN(Dual Path Networks) 是 ImageNet 2017 目标定位冠军的图像分类模型,融合了 ResNet 和 DenseNet 的核心思想。该PaddleHub Module结构为 DPN92,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install dpn92_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run dpn92_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn92_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install dpn92_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/dpn98_imagenet/README.md b/modules/image/classification/dpn98_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a418583c623729c72c281ef52c9071c46cc8edc1 --- /dev/null +++ b/modules/image/classification/dpn98_imagenet/README.md @@ -0,0 +1,86 @@ +# dpn98_imagenet + +|模型名称|dpn98_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|DPN| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|238MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - DPN(Dual Path Networks) 是 ImageNet 2017 目标定位冠军的图像分类模型,融合了 ResNet 和 DenseNet 的核心思想。该PaddleHub Module结构为 DPN98,基于ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install dpn98_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run dpn98_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="dpn98_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install dpn98_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/efficientnetb0_imagenet/README.md b/modules/image/classification/efficientnetb0_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a1013ab01f121c264517ecdd7ca0bbe055fcea0f --- /dev/null +++ b/modules/image/classification/efficientnetb0_imagenet/README.md @@ -0,0 +1,137 @@ +# efficientnetb0_imagenet + +|模型名称|efficientnetb0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|22MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB0,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb0_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb0_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb0_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb0_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb0_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb0_small_imagenet/README.md b/modules/image/classification/efficientnetb0_small_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..be464cc8ca77aedea64a199449d2d34e9db18eb4 --- /dev/null +++ b/modules/image/classification/efficientnetb0_small_imagenet/README.md @@ -0,0 +1,136 @@ +# efficientnetb0_small_imagenet + +|模型名称|efficientnetb0_small_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|20MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB0,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb0_small_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb0_small_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb0_small_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb0_small_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb0_small_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install efficientnetb0_small_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/efficientnetb1_imagenet/README.md b/modules/image/classification/efficientnetb1_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fe5981ece4560d811de6953db1b18ea00bee62c5 --- /dev/null +++ b/modules/image/classification/efficientnetb1_imagenet/README.md @@ -0,0 +1,136 @@ +# efficientnetb1_imagenet + +|模型名称|efficientnetb1_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|33MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB1,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb1_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb1_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb1_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb1_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb1_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb1_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb2_imagenet/README.md b/modules/image/classification/efficientnetb2_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3972b35e11309354a24787a0167ea450bc6891b7 --- /dev/null +++ b/modules/image/classification/efficientnetb2_imagenet/README.md @@ -0,0 +1,136 @@ +# efficientnetb2_imagenet + +|模型名称|efficientnetb2_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|38MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB2,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb2_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb2_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb2_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb2_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb2_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb2_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb3_imagenet/README.md b/modules/image/classification/efficientnetb3_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3bb6273921e6cce4c3210736b0eeb4b1c264f53c --- /dev/null +++ b/modules/image/classification/efficientnetb3_imagenet/README.md @@ -0,0 +1,136 @@ +# efficientnetb3_imagenet + +|模型名称|efficientnetb3_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|51MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB3,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb3_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb3_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb3_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb3_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb3_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb3_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb4_imagenet/README.md b/modules/image/classification/efficientnetb4_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1a7d0e9baf4a7e8503c3e7f7b9fdec32fdac52d9 --- /dev/null +++ b/modules/image/classification/efficientnetb4_imagenet/README.md @@ -0,0 +1,137 @@ +# efficientnetb4_imagenet + +|模型名称|efficientnetb4_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|77MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB4,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb4_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb4_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb4_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb4_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb4_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb4_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb5_imagenet/README.md b/modules/image/classification/efficientnetb5_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3c8a4bc373f02cf773cfe4a742c86c3025774361 --- /dev/null +++ b/modules/image/classification/efficientnetb5_imagenet/README.md @@ -0,0 +1,137 @@ +# efficientnetb5_imagenet + +|模型名称|efficientnetb5_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|121MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB5,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb5_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb5_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb5_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb5_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb5_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb6_imagenet/README.md b/modules/image/classification/efficientnetb6_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..746ff1a711556429a03625db0f959146fbae3331 --- /dev/null +++ b/modules/image/classification/efficientnetb6_imagenet/README.md @@ -0,0 +1,136 @@ +# efficientnetb6_imagenet + +|模型名称|efficientnetb6_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|170MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB6,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb6_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb6_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb6_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb6_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb6_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb6_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/efficientnetb7_imagenet/README.md b/modules/image/classification/efficientnetb7_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bef07051b9390f2416748df89041d60ef9290d32 --- /dev/null +++ b/modules/image/classification/efficientnetb7_imagenet/README.md @@ -0,0 +1,137 @@ +# efficientnetb7_imagenet + +|模型名称|efficientnetb7_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|EfficientNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|260MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - EfficientNet 是谷歌的开源新模型,是一个轻量级网络,它的主干网络由 MBConv 构成,同时采取了 squeeze-and-excitation 操作对网络结构进行优化。该 PaddleHub Module结构为 EfficientNetB7,基于 ImageNet-2012 数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install efficientnetb7_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run efficientnetb7_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="efficientnetb7_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m efficientnetb7_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/efficientnetb7_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 提升预测性能以及易用性 + - ```shell + $ hub install efficientnetb7_imagenet==1.1.0 + ``` diff --git a/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md index 12c673852199021c2f88a228795642b334d16841..ef750cec4941fb598296ccc9308f8261862d0c70 100644 --- a/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md +++ b/modules/image/classification/fix_resnext101_32x48d_wsl_imagenet/README.md @@ -1,149 +1,134 @@ -## 命令行预测 +# fix_resnext101_32x48d_wsl_imagenet -``` -hub run fix_resnext101_32x48d_wsl_imagenet --input_path "/PATH/TO/IMAGE" -``` +|模型名称|fix_resnext101_32x48d_wsl_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|3.1GB| +|最新更新日期|-| +|数据指标|-| -## API -```python -def get_expected_image_width() -``` +## 一、模型基本信息 -返回预处理的图片宽度,也就是224。 -```python -def get_expected_image_height() -``` -返回预处理的图片高度,也就是224。 +- ### 模型介绍 -```python -def get_pretrained_images_mean() -``` + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 -返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 -```python -def get_pretrained_images_std() -``` +## 二、安装 -返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 +- ### 1、环境依赖 + - paddlepaddle >= 1.6.2 -```python -def context(trainable=True, pretrained=True) -``` + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) -**参数** -* trainable (bool): 计算图的参数是否为可训练的; -* pretrained (bool): 是否加载默认的预训练模型。 +- ### 2、安装 -**返回** + - ```shell + $ hub install fix_resnext101_32x48d_wsl_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量; -* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为: - * classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; - * feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 -* context\_prog(fluid.Program): 计算图,用于迁移学习。 +## 三、模型API预测 -```python -def classification(images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): -``` +- ### 1、命令行预测 -**参数** + - ```shell + $ hub run fix_resnext101_32x48d_wsl_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU 来预测; -* top\_k (int): 返回预测结果的前 k 个。 +- ### 2、预测代码示例 -**返回** + - ```python + import paddlehub as hub + import cv2 -res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。 + classifier = hub.Module(name="fix_resnext101_32x48d_wsl_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` +- ### 3、API -将模型保存到指定路径。 -**参数** + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 -## 代码示例 + - **返回** -```python -import paddlehub as hub -import cv2 + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 -classifier = hub.Module(name="fix_resnext101_32x48d_wsl_imagenet") -result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = classifier.classification(paths=['/PATH/TO/IMAGE']) -``` -## 服务部署 -PaddleHub Serving可以部署一个在线图像识别服务。 +## 四、服务部署 -## 第一步:启动PaddleHub Serving +- PaddleHub Serving可以部署一个图像识别的在线服务。 -运行启动命令: -```shell -$ hub serving start -m fix_resnext101_32x48d_wsl_imagenet -``` +- ### 第一步:启动PaddleHub Serving -这样就完成了一个在线图像识别服务化API的部署,默认端口号为8866。 + - 运行启动命令: + - ```shell + $ hub serving start -m fix_resnext101_32x48d_wsl_imagenet + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 -## 第二步:发送预测请求 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 +- ### 第二步:发送预测请求 -```python -import requests -import json -import cv2 -import base64 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + import requests + import json + import cv2 + import base64 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/fix_resnext101_32x48d_wsl_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/fix_resnext101_32x48d_wsl_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + # 打印预测结果 + print(r.json()["results"]) + ``` -# 打印预测结果 -print(r.json()["results"]) -``` -### 查看代码 +## 五、更新历史 -https://github.com/PaddlePaddle/PaddleClas +* 1.0.0 -### 依赖 - -paddlepaddle >= 1.6.2 - -paddlehub >= 1.6.0 + 初始发布 + - ```shell + $ hub install fix_resnext101_32x48d_wsl_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/food_classification/README.md b/modules/image/classification/food_classification/README.md new file mode 100644 index 0000000000000000000000000000000000000000..01f910138e18aee8c45d1a2f56f493d547988d50 --- /dev/null +++ b/modules/image/classification/food_classification/README.md @@ -0,0 +1,90 @@ +# food_classification + +|模型名称|food_classification| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet50_vd_ssld| +|数据集|美食数据集| +|是否支持Fine-tuning|否| +|模型大小|91MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 美食分类(food_classification),该模型可识别苹果派,小排骨,烤面包,牛肉馅饼,牛肉鞑靼。该PaddleHub Module支持API预测及命令行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + - paddlex >= 1.3.7 + + +- ### 2、安装 + + - ```shell + $ hub install food_classification + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run food_classification --input_path /PATH/TO/IMAGE + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="food_classification") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - 分类接口API。 + - **参数** + - images:list类型,待检测的图像。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型: + - category_id (int): 类别的id; + - category(str): 类别; + - score(float): 准确率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install food_classification==1.0.0 + ``` diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/__init__.py b/modules/image/classification/food_classification/__init__.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/__init__.py rename to modules/image/classification/food_classification/__init__.py diff --git a/modules/thirdparty/image/classification/food_classification/module.py b/modules/image/classification/food_classification/module.py similarity index 100% rename from modules/thirdparty/image/classification/food_classification/module.py rename to modules/image/classification/food_classification/module.py diff --git a/modules/thirdparty/image/classification/food_classification/requirements.txt b/modules/image/classification/food_classification/requirements.txt similarity index 70% rename from modules/thirdparty/image/classification/food_classification/requirements.txt rename to modules/image/classification/food_classification/requirements.txt index ad32066430096ff4050dce8930f74eae5eb9d2f0..f3c5b8fb12473794251e0a4669dac313cb93eff4 100644 --- a/modules/thirdparty/image/classification/food_classification/requirements.txt +++ b/modules/image/classification/food_classification/requirements.txt @@ -1,3 +1,3 @@ paddlepaddle >= 2.0.0 paddlehub >= 2.0.0 -paddlex >= 1.3.7 +paddlex == 1.3.7 diff --git a/modules/image/classification/ghostnet_x0_5_imagenet/README.md b/modules/image/classification/ghostnet_x0_5_imagenet/README.md index 40d83f30ccf0a11ddc149da0a92f32d0a78666ba..26762f0b0273794f7aa033586990e796be6be9c5 100644 --- a/modules/image/classification/ghostnet_x0_5_imagenet/README.md +++ b/modules/image/classification/ghostnet_x0_5_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install ghostnet_x0_5_imagenet==1.0.0 -``` +# ghostnet_x0_5_imagenet -## 命令行预测 +|模型名称|ghostnet_x0_5_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|GhostNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|15MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run ghostnet_x0_5_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - GhostNet是华为在2020年提出的全新轻量级网络结构,通过引入ghost模块,大大缓解了传统深度网络中特征的冗余计算问题,大大减少了网络参数和计算量。 - model = hub.Module(name='ghostnet_x0_5_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install ghostnet_x0_5_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run ghostnet_x0_5_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x0_5_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ghostnet_x0_5_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='ghostnet_x0_5_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="ghostnet_x0_5_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x0_5_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='ghostnet_x0_5_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m ghostnet_x0_5_imagenet + ``` -```shell -$ hub serving start -m ghostnet_x0_5_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ghostnet_x0_5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/ghostnet_x0_5_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/ghostnet_x1_0_imagenet/README.md b/modules/image/classification/ghostnet_x1_0_imagenet/README.md index 9e25c471b72cf96393766d3c46eeb161ea7489b2..1708020b63fcdfbdb445f1dd8a86fd312e9b61e5 100644 --- a/modules/image/classification/ghostnet_x1_0_imagenet/README.md +++ b/modules/image/classification/ghostnet_x1_0_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install ghostnet_x1_0_imagenet==1.0.0 -``` +# ghostnet_x1_0_imagenet -## 命令行预测 +|模型名称|ghostnet_x1_0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|GhostNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|30MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run ghostnet_x1_0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - GhostNet是华为在2020年提出的全新轻量级网络结构,通过引入ghost模块,大大缓解了传统深度网络中特征的冗余计算问题,大大减少了网络参数和计算量。 - model = hub.Module(name='ghostnet_x1_0_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install ghostnet_x1_0_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run ghostnet_x1_0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x1_0_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ghostnet_x1_0_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='ghostnet_x1_0_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="ghostnet_x1_0_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x1_0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='ghostnet_x1_0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m ghostnet_x1_0_imagenet + ``` -```shell -$ hub serving start -m ghostnet_x1_0_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ghostnet_x1_0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/ghostnet_x1_0_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/ghostnet_x1_3_imagenet/README.md b/modules/image/classification/ghostnet_x1_3_imagenet/README.md index 72189fcd68cd3b61d7937c3de43853d380fbf0c1..a5f0a27a3e40ed57ce1345e70e1027875697077b 100644 --- a/modules/image/classification/ghostnet_x1_3_imagenet/README.md +++ b/modules/image/classification/ghostnet_x1_3_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install ghostnet_x1_3_imagenet==1.0.0 -``` +# ghostnet_x1_3_imagenet -## 命令行预测 +|模型名称|ghostnet_x1_3_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|GhostNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|43MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run ghostnet_x1_3_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - GhostNet是华为在2020年提出的全新轻量级网络结构,通过引入ghost模块,大大缓解了传统深度网络中特征的冗余计算问题,大大减少了网络参数和计算量。 - model = hub.Module(name='ghostnet_x1_3_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install ghostnet_x1_3_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run ghostnet_x1_3_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x1_3_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ghostnet_x1_3_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='ghostnet_x1_3_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="ghostnet_x1_3_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x1_3_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='ghostnet_x1_3_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m ghostnet_x1_3_imagenet + ``` -```shell -$ hub serving start -m ghostnet_x1_3_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ghostnet_x1_3_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/ghostnet_x1_3_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/ghostnet_x1_3_imagenet_ssld/README.md b/modules/image/classification/ghostnet_x1_3_imagenet_ssld/README.md index ef310be9ec7f4112fc7d02cc962bd5734da8a92f..b3e87610246e2568ddc65006d9f83b46a14097a6 100644 --- a/modules/image/classification/ghostnet_x1_3_imagenet_ssld/README.md +++ b/modules/image/classification/ghostnet_x1_3_imagenet_ssld/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install ghostnet_x1_3_imagenet_ssld==1.0.0 -``` +# ghostnet_x1_3_imagenet_ssld -## 命令行预测 +|模型名称|ghostnet_x1_3_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|GhostNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|43MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run ghostnet_x1_3_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - GhostNet是华为在2020年提出的全新轻量级网络结构,通过引入ghost模块,大大缓解了传统深度网络中特征的冗余计算问题,大大减少了网络参数和计算量。 - model = hub.Module(name='ghostnet_x1_3_imagenet_ssld',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install ghostnet_x1_3_imagenet_ssld + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run ghostnet_x1_3_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x1_3_imagenet_ssld') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ghostnet_x1_3_imagenet_ssld对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='ghostnet_x1_3_imagenet_ssld', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="ghostnet_x1_3_imagenet_ssld", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='ghostnet_x1_3_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='ghostnet_x1_3_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m ghostnet_x1_3_imagenet_ssld + ``` -```shell -$ hub serving start -m ghostnet_x1_3_imagenet_ssld -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ghostnet_x1_3_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/ghostnet_x1_3_imagenet_ssld" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/googlenet_imagenet/README.md b/modules/image/classification/googlenet_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7dec0850aea31f81e2488829ea20d7e80a8f6c3d --- /dev/null +++ b/modules/image/classification/googlenet_imagenet/README.md @@ -0,0 +1,84 @@ +# googlenet_imagenet + +|模型名称|googlenet_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|GoogleNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|28MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - GoogleNet是图像分类中的经典模型。由Christian Szegedy等人在2014年提出,并获得了2014年ILSVRC竞赛冠军。该PaddleHub Module结构为GoogleNet,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install googlenet_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run googlenet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="googlenet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install googlenet_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/hrnet18_imagenet/README.md b/modules/image/classification/hrnet18_imagenet/README.md index d1530c8902706896bd01aa7c93a039109c7f3b2f..9cd941f141df7799f60d08294969d247ecab064b 100644 --- a/modules/image/classification/hrnet18_imagenet/README.md +++ b/modules/image/classification/hrnet18_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet18_imagenet==1.0.0 -``` +# hrnet18_imagenet -## 命令行预测 +|模型名称|hrnet18_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|124MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet18_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet18_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet18_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet18_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet18_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet18_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet18_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet18_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet18_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet18_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet18_imagenet + ``` -```shell -$ hub serving start -m hrnet18_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet18_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet18_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet18_imagenet_ssld/README.md b/modules/image/classification/hrnet18_imagenet_ssld/README.md index 5a33c39fd31ff02b838716d374d0dcc85a7b548c..305ac09b28d3c1d2e895fa3968d0dc80aca40a90 100644 --- a/modules/image/classification/hrnet18_imagenet_ssld/README.md +++ b/modules/image/classification/hrnet18_imagenet_ssld/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet18_imagenet_ssld==1.0.0 -``` +# hrnet18_imagenet_ssld -## 命令行预测 +|模型名称|hrnet18_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|124MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet18_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet18_imagenet_ssld',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet18_imagenet_ssld + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet18_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet18_imagenet_ssld') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet18_imagenet_ssld对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet18_imagenet_ssld', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet18_imagenet_ssld", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet18_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet18_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet18_imagenet_ssld + ``` -```shell -$ hub serving start -m hrnet18_imagenet_ssld -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet18_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet18_imagenet_ssld" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet30_imagenet/README.md b/modules/image/classification/hrnet30_imagenet/README.md index 4b447d1f3c4eacc623f5287af2ea8bdf71e111ed..c64a0075696dc6896f63fd43ed27922631c88d1f 100644 --- a/modules/image/classification/hrnet30_imagenet/README.md +++ b/modules/image/classification/hrnet30_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet30_imagenet==1.0.0 -``` +# hrnet30_imagenet -## 命令行预测 +|模型名称|hrnet30_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|218MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet30_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet30_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet30_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet30_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet30_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet30_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet30_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet30_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet30_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet30_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet30_imagenet + ``` -```shell -$ hub serving start -m hrnet30_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet30_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet30_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet32_imagenet/README.md b/modules/image/classification/hrnet32_imagenet/README.md index af97dc62a0c95cea46adad5db8ae987eefc29ff4..69b3c82d8f8eb38c988d045c30fa1e5139e8e7d0 100644 --- a/modules/image/classification/hrnet32_imagenet/README.md +++ b/modules/image/classification/hrnet32_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet32_imagenet==1.0.0 -``` +# hrnet32_imagenet -## 命令行预测 +|模型名称|hrnet32_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|238MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet32_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet32_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet32_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet32_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet32_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet32_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet32_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet32_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet32_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet32_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet32_imagenet + ``` -```shell -$ hub serving start -m hrnet32_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet32_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet32_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet40_imagenet/README.md b/modules/image/classification/hrnet40_imagenet/README.md index d09944004c371022fbc896710e16ee3d65a7db52..3c6efc2c6a61e6a48a092ea1b996e29d1335531c 100644 --- a/modules/image/classification/hrnet40_imagenet/README.md +++ b/modules/image/classification/hrnet40_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet40_imagenet==1.0.0 -``` +# hrnet40_imagenet -## 命令行预测 +|模型名称|hrnet40_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|333MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet40_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet40_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet40_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet40_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet40_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet40_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet40_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet40_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet40_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet40_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet40_imagenet + ``` -```shell -$ hub serving start -m hrnet40_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet40_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet40_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet44_imagenet/README.md b/modules/image/classification/hrnet44_imagenet/README.md index 78031d1cacce6b85e3dee97957fba8326b00b608..9c2c6ccbb85a43620ad2641a6e2a615f46ee86ea 100644 --- a/modules/image/classification/hrnet44_imagenet/README.md +++ b/modules/image/classification/hrnet44_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet44_imagenet==1.0.0 -``` +# hrnet44_imagenet -## 命令行预测 +|模型名称|hrnet44_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|388MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet44_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet44_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet44_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet44_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet44_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet44_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet44_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet44_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet44_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet44_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet44_imagenet + ``` -```shell -$ hub serving start -m hrnet44_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet44_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet44_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet48_imagenet/README.md b/modules/image/classification/hrnet48_imagenet/README.md index 1b82779201fb4ec941d9f31ed380e536a63c96a3..6c480ce1fc730b79b8ce890d7239032a839d84e6 100644 --- a/modules/image/classification/hrnet48_imagenet/README.md +++ b/modules/image/classification/hrnet48_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet48_imagenet==1.0.0 -``` +# hrnet48_imagenet -## 命令行预测 +|模型名称|hrnet48_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|448MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet48_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet48_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet48_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet48_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet48_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet48_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet48_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet48_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet48_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet48_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet48_imagenet + ``` -```shell -$ hub serving start -m hrnet48_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet48_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet48_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet48_imagenet_ssld/README.md b/modules/image/classification/hrnet48_imagenet_ssld/README.md index 4b9fa929af3a9df2e95e2b93189818e4f162fa9b..ff49aa29797cf659f6b350b2670b66d6d5b2821e 100644 --- a/modules/image/classification/hrnet48_imagenet_ssld/README.md +++ b/modules/image/classification/hrnet48_imagenet_ssld/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet48_imagenet_ssld==1.0.0 -``` +# hrnet48_imagenet_ssld -## 命令行预测 +|模型名称|hrnet48_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|446MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet48_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet48_imagenet_ssld',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet48_imagenet_ssld + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet48_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet48_imagenet_ssld') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet48_imagenet_ssld对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet48_imagenet_ssld', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet48_imagenet_ssld", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet48_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet48_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet48_imagenet_ssld + ``` -```shell -$ hub serving start -m hrnet48_imagenet_ssld -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet48_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet48_imagenet_ssld" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/hrnet64_imagenet/README.md b/modules/image/classification/hrnet64_imagenet/README.md index ed6f1b153bf5e1113e3f2aa588ff15fcc5bf9c87..7cf46cc66192040870e59a4a9712fa5ceb986f26 100644 --- a/modules/image/classification/hrnet64_imagenet/README.md +++ b/modules/image/classification/hrnet64_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install hrnet64_imagenet==1.0.0 -``` +# hrnet64_imagenet -## 命令行预测 +|模型名称|hrnet64_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|740MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run hrnet64_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='hrnet64_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install hrnet64_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run hrnet64_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet64_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hrnet64_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='hrnet64_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="hrnet64_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='hrnet64_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='hrnet64_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m hrnet64_imagenet + ``` -```shell -$ hub serving start -m hrnet64_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/hrnet64_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hrnet64_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/inception_v4_imagenet/README.md b/modules/image/classification/inception_v4_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ca8d613ee382e526ab8b75c1a502639e8c0fec40 --- /dev/null +++ b/modules/image/classification/inception_v4_imagenet/README.md @@ -0,0 +1,84 @@ +# inception_v4_imagenet + +|模型名称|inception_v4_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Inception_V4| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|167MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - Inception 结构最初由 GoogLeNet 引入,因此 GoogLeNet 也被称为 Inception-v1,通过在 Inception-v1 的基础上引入Batch Normalization、分解、残差连接等技术,设计出了Inception-v4。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install inception_v4_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run inception_v4_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="inception_v4_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率。 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install inception_v4_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/marine_biometrics/README.md b/modules/image/classification/marine_biometrics/README.md new file mode 100644 index 0000000000000000000000000000000000000000..797288aee8ce47c102dc0bf2973bd57fa8d473d1 --- /dev/null +++ b/modules/image/classification/marine_biometrics/README.md @@ -0,0 +1,85 @@ +# marine_biometrics + +|模型名称|marine_biometrics| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet50_vd_ssld| +|数据集|Fish4Knowledge| +|是否支持Fine-tuning|否| +|模型大小|84MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 海洋生物识别(marine_biometrics),该模型可准确识别鱼的种类。该PaddleHub Module支持API预测及命令行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install marine_biometrics + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run marine_biometrics --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="marine_biometrics") + images = [cv2.imread('/PATH/TO/IMAGE')] + results = classifier.predict(images=images) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - 分类接口API。 + - **参数** + - images:list类型,待检测的图像。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install marine_biometrics==1.0.0 + ``` diff --git a/modules/thirdparty/video/Video_editing/SkyAR/__init__.py b/modules/image/classification/marine_biometrics/__init__.py similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/__init__.py rename to modules/image/classification/marine_biometrics/__init__.py diff --git a/modules/thirdparty/image/classification/marine_biometrics/module.py b/modules/image/classification/marine_biometrics/module.py similarity index 100% rename from modules/thirdparty/image/classification/marine_biometrics/module.py rename to modules/image/classification/marine_biometrics/module.py diff --git a/modules/image/classification/marine_biometrics/requirements.txt b/modules/image/classification/marine_biometrics/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..307c5de765a9bb322c4deebf2bcee55109e7ce74 --- /dev/null +++ b/modules/image/classification/marine_biometrics/requirements.txt @@ -0,0 +1 @@ +paddlex==1.3.7 diff --git a/modules/thirdparty/image/classification/marine_biometrics/serving_client_demo.py b/modules/image/classification/marine_biometrics/serving_client_demo.py similarity index 100% rename from modules/thirdparty/image/classification/marine_biometrics/serving_client_demo.py rename to modules/image/classification/marine_biometrics/serving_client_demo.py diff --git a/modules/image/classification/mobilenet_v2_animals/README.md b/modules/image/classification/mobilenet_v2_animals/README.md index f1824d6536d8e8e5ea5400df5486f0ff8ec2268d..e1ba58dcdce89cfc89cf33108716e903d6458d54 100644 --- a/modules/image/classification/mobilenet_v2_animals/README.md +++ b/modules/image/classification/mobilenet_v2_animals/README.md @@ -1,159 +1,134 @@ -```shell -$ hub install mobilenet_v2_animals==1.0.0 -``` +# mobilenet_v2_animals -

-
MobileNet 系列的网络结构 -

+|模型名称|mobilenet_v2_animals| +| :--- | :---: | +|类别|图像-图像分类| +|网络|MobileNet_v2| +|数据集|百度自建动物数据集| +|是否支持Fine-tuning|否| +|模型大小|50MB| +|最新更新日期|-| +|数据指标|-| -模型的详情可参考[论文](https://arxiv.org/pdf/1801.04381.pdf) -## 命令行预测 +## 一、模型基本信息 -``` -hub run mobilenet_v2_animals --input_path "/PATH/TO/IMAGE" -``` -## API -```python -def get_expected_image_width() -``` +- ### 模型介绍 -返回预处理的图片宽度,也就是224。 + - MobileNet V2 是一个轻量化的卷积神经网络,它在 MobileNet 的基础上,做了 Inverted Residuals 和 Linear bottlenecks 这两大改进。该 PaddleHub Module 是在百度自建动物数据集上训练得到的,可用于图像分类和特征提取,当前已支持7978种动物的分类识别。模型的详情可参考[论文](https://arxiv.org/pdf/1801.04381.pdf)。 -```python -def get_expected_image_height() -``` -返回预处理的图片高度,也就是224。 -```python -def get_pretrained_images_mean() -``` +## 二、安装 -返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 +- ### 1、环境依赖 -```python -def get_pretrained_images_std() -``` + - paddlepaddle >= 1.6.2 -返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) -```python -def context(trainable=True, pretrained=True) -``` +- ### 2、安装 -**参数** + - ```shell + $ hub install mobilenet_v2_animals + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* trainable (bool): 计算图的参数是否为可训练的; -* pretrained (bool): 是否加载默认的预训练模型。 +## 三、模型API预测 -**返回** +- ### 1、命令行预测 -* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量; -* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为: - * classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; - * feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 -* context\_prog(fluid.Program): 计算图,用于迁移学习。 + - ```shell + $ hub run mobilenet_v2_animals --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -```python -def classification(images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): -``` +- ### 2、预测代码示例 -**参数** + - ```python + import paddlehub as hub + import cv2 -* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU 来预测; -* top\_k (int): 返回预测结果的前 k 个。 + classifier = hub.Module(name="mobilenet_v2_animals") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` -**返回** +- ### 3、API -res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。 -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** -将模型保存到指定路径。 + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 -**参数** + - **返回** -* dirname: 存在模型的目录名称 -* model_filename: 模型文件名称,默认为\_\_model\_\_ -* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 -## 代码示例 -```python -import paddlehub as hub -import cv2 -classifier = hub.Module(name="mobilenet_v2_animals") +## 四、服务部署 -result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = classifier.classification(paths=['/PATH/TO/IMAGE']) -``` +- PaddleHub Serving可以部署一个动物识别的在线服务。 -## 服务部署 +- ### 第一步:启动PaddleHub Serving -PaddleHub Serving可以部署一个在线动物识别服务。 + - 运行启动命令: + - ```shell + $ hub serving start -m mobilenet_v2_animals + ``` -## 第一步:启动PaddleHub Serving + - 这样就完成了一个动物识别的在线服务的部署,默认端口号为8866。 -运行启动命令: -```shell -$ hub serving start -m mobilenet_v2_animals -``` + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -这样就完成了一个在线动物识别服务化API的部署,默认端口号为8866。 +- ### 第二步:发送预测请求 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -## 第二步:发送预测请求 + - ```python + import requests + import json + import cv2 + import base64 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') -```python -import requests -import json -import cv2 -import base64 + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v2_animals" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + # 打印预测结果 + print(r.json()["results"]) + ``` -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') +## 五、更新历史 -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/mobilenet_v2_animals" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) +* 1.0.0 -# 打印预测结果 -print(r.json()["results"]) -``` - -### 查看代码 - -[PaddlePaddle/models 图像分类](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification) - -### 依赖 - -paddlepaddle >= 1.6.2 - -paddlehub >= 1.6.0 + 初始发布 + - ```shell + $ hub install mobilenet_v2_animals==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v2_dishes/README.md b/modules/image/classification/mobilenet_v2_dishes/README.md index cdbd1c048620f88c5b2564c642912de839ce1230..aad927459be227f693f107e47e61655a934ce95b 100644 --- a/modules/image/classification/mobilenet_v2_dishes/README.md +++ b/modules/image/classification/mobilenet_v2_dishes/README.md @@ -1,159 +1,139 @@ -```shell -$ hub install mobilenet_v2_dishes==1.0.0 -``` +# mobilenet_v2_dishes -

-
MobileNet 系列的网络结构 -

- -模型的详情可参考[论文](https://arxiv.org/pdf/1801.04381.pdf) - -## 命令行预测 +|模型名称|mobilenet_v2_dishes| +| :--- | :---: | +|类别|图像-图像分类| +|网络|MobileNet_v2| +|数据集|百度自建菜品数据集| +|是否支持Fine-tuning|否| +|模型大小|52MB| +|最新更新日期|-| +|数据指标|-| -``` -hub run mobilenet_v2_dishes --input_path "/PATH/TO/IMAGE" -``` -## API +## 一、模型基本信息 -```python -def get_expected_image_width() -``` -返回预处理的图片宽度,也就是224。 -```python -def get_expected_image_height() -``` +- ### 模型介绍 -返回预处理的图片高度,也就是224。 + - MobileNet V2 是一个轻量化的卷积神经网络,它在 MobileNet 的基础上,做了 Inverted Residuals 和 Linear bottlenecks 这两大改进。该 PaddleHub Module 是在百度自建菜品数据集上训练得到的,可用于图像分类和特征提取,当前已支持8416种菜品的分类识别。 -```python -def get_pretrained_images_mean() -``` - -返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 - -```python -def get_pretrained_images_std() -``` +

+
+

-返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 + - 更多详情参考:[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381.pdf) +## 二、安装 -```python -def context(trainable=True, pretrained=True) -``` +- ### 1、环境依赖 -**参数** + - paddlepaddle >= 1.6.2 -* trainable (bool): 计算图的参数是否为可训练的; -* pretrained (bool): 是否加载默认的预训练模型。 + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) -**返回** -* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量; -* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为: - * classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; - * feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 -* context\_prog(fluid.Program): 计算图,用于迁移学习。 +- ### 2、安装 -```python -def classification(images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): -``` + - ```shell + $ hub install mobilenet_v2_dishes + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -**参数** +## 三、模型API预测 -* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU 来预测; -* top\_k (int): 返回预测结果的前 k 个。 +- ### 1、命令行预测 -**返回** + - ```shell + $ hub run mobilenet_v2_dishes --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现菜品分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 +- ### 2、预测代码示例 -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` + - ```python + import paddlehub as hub + import cv2 -将模型保存到指定路径。 + classifier = hub.Module(name="mobilenet_v2_dishes") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` -**参数** +- ### 3、API -* dirname: 存在模型的目录名称 -* model_filename: 模型文件名称,默认为\_\_model\_\_ -* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** -## 代码示例 + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 -```python -import paddlehub as hub -import cv2 + - **返回** -classifier = hub.Module(name="mobilenet_v2_dishes") + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 -result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = classifier.classification(paths=['/PATH/TO/IMAGE']) -``` -## 服务部署 -PaddleHub Serving可以部署一个菜品分类的在线服务。 -## 第一步:启动PaddleHub Serving +## 四、服务部署 -运行启动命令: -```shell -$ hub serving start -m mobilenet_v2_dishes -``` +- PaddleHub Serving可以部署一个菜品分类的在线服务。 -这样就完成了一个菜品分类的在线服务的部署,默认端口号为8866。 +- ### 第一步:启动PaddleHub Serving -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - 运行启动命令: + - ```shell + $ hub serving start -m mobilenet_v2_dishes + ``` -## 第二步:发送预测请求 + - 这样就完成了一个菜品分类的在线服务的部署,默认端口号为8866。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -```python -import requests -import json -import cv2 -import base64 +- ### 第二步:发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') + - ```python + import requests + import json + import cv2 + import base64 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/mobilenet_v2_dishes" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v2_dishes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) -# 打印预测结果 -print(r.json()["results"]) -``` + # 打印预测结果 + print(r.json()["results"]) + ``` -### 查看代码 -[PaddlePaddle/models 图像分类](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification) +## 五、更新历史 -### 依赖 +* 1.0.0 -paddlepaddle >= 1.6.2 + 初始发布 -paddlehub >= 1.6.0 + - ```shell + $ hub install mobilenet_v2_dishes==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v2_imagenet/README.md b/modules/image/classification/mobilenet_v2_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7b9bb0f7e5a0bcfcab35910ad88189e5bce756b3 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_imagenet/README.md @@ -0,0 +1,88 @@ +# mobilenet_v2_imagenet + +|模型名称|mobilenet_v2_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Mobilenet_v2| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|15MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - MobileNet V2是Mark Sandler, Andrew Howard等人在2018年提出的一个图像分类模型,该系列模型(MobileNet)是为移动和嵌入式设备提出的高效模型,在模型参数较少的情况下仍然保持了较高的分类准确率。该PaddleHub Module基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install mobilenet_v2_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run mobilenet_v2_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v2_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 修复python2中编码问题 + + - ```shell + $ hub install mobilenet_v2_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/mobilenet_v2_imagenet_ssld/README.md b/modules/image/classification/mobilenet_v2_imagenet_ssld/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4529275acdfd69eb40ff4f1133adde685aef7014 --- /dev/null +++ b/modules/image/classification/mobilenet_v2_imagenet_ssld/README.md @@ -0,0 +1,133 @@ +# mobilenet_v2_imagenet_ssld + +|模型名称|mobilenet_v2_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Mobilenet_v2| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|15MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - MobileNet V2是Mark Sandler, Andrew Howard等人在2018年提出的一个图像分类模型,该系列模型(MobileNet)是为移动和嵌入式设备提出的高效模型,在模型参数较少的情况下仍然保持了较高的分类准确率。该PaddleHub Module基于ImageNet-2012数据集并采用PaddleClas提供的SSLD蒸馏方法训练得到,接受输入图片大小为224 x 224 x 3,支持finetune,也可以直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install mobilenet_v2_imagenet_ssld + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run mobilenet_v2_imagenet_ssld --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v2_imagenet_ssld") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m mobilenet_v2_imagenet_ssld + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v2_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install mobilenet_v2_imagenet_ssld==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README.md b/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README.md new file mode 100644 index 0000000000000000000000000000000000000000..03cf9d75fd085c41addccf1758b04151ffccf76e --- /dev/null +++ b/modules/image/classification/mobilenet_v3_large_imagenet_ssld/README.md @@ -0,0 +1,135 @@ +# mobilenet_v3_large_imagenet_ssld + +|模型名称|mobilenet_v3_large_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Mobilenet_v3_large| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|23MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - MobileNetV3是Google在2019年发布的新模型,作者通过结合NAS与NetAdapt进行搜索得到该网络结构,提供了Large和Small两个版本,分别适用于对资源不同要求的情况。对比于MobileNetV2,新的模型在速度和精度方面均有提升。该PaddleHubModule的模型结构为MobileNetV3 Large,基于ImageNet-2012数据集并采用PaddleClas提供的SSLD蒸馏方法训练得到,接受输入图片大小为224 x 224 x 3,支持finetune,也可以直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install mobilenet_v3_large_imagenet_ssld + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run mobilenet_v3_large_imagenet_ssld --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v3_large_imagenet_ssld") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m mobilenet_v3_large_imagenet_ssld + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v3_large_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install mobilenet_v3_large_imagenet_ssld==1.0.0 + ``` diff --git a/modules/image/classification/mobilenet_v3_small_imagenet_ssld/README.md b/modules/image/classification/mobilenet_v3_small_imagenet_ssld/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bdcd475885d3ac0b8181978af50a3d0ead7fcaf7 --- /dev/null +++ b/modules/image/classification/mobilenet_v3_small_imagenet_ssld/README.md @@ -0,0 +1,134 @@ +# mobilenet_v3_small_imagenet_ssld + +|模型名称|mobilenet_v3_small_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Mobilenet_v3_Small| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|13MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - MobileNetV3是Google在2019年发布的新模型,作者通过结合NAS与NetAdapt进行搜索得到该网络结构,提供了Large和Small两个版本,分别适用于对资源不同要求的情况。对比于MobileNetV2,新的模型在速度和精度方面均有提升。该PaddleHubModule的模型结构为MobileNetV3 Small,基于ImageNet-2012数据集并采用PaddleClas提供的SSLD蒸馏方法训练得到,接受输入图片大小为224 x 224 x 3,支持finetune,也可以直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install mobilenet_v3_small_imagenet_ssld + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run mobilenet_v3_small_imagenet_ssld --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="mobilenet_v3_small_imagenet_ssld") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m mobilenet_v3_small_imagenet_ssld + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/mobilenet_v3_small_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install mobilenet_v3_small_imagenet_ssld==1.0.0 + ``` diff --git a/modules/image/classification/nasnet_imagenet/README.md b/modules/image/classification/nasnet_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b9ca44eb5114e65433ecb398ba2acedc6634d5c7 --- /dev/null +++ b/modules/image/classification/nasnet_imagenet/README.md @@ -0,0 +1,87 @@ +# nasnet_imagenet + +|模型名称|nasnet_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|NASNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|345MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - NASNet是Google通过AutoML自动训练出来的图像分类模型。该PaddleHub Module基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install nasnet_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run nasnet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="nasnet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 修复python2中编码问题 + - ```shell + $ hub install nasnet_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/pnasnet_imagenet/README.md b/modules/image/classification/pnasnet_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e87ff0721634bb6613bcac1b4a42035ff4fe1b55 --- /dev/null +++ b/modules/image/classification/pnasnet_imagenet/README.md @@ -0,0 +1,87 @@ +# pnasnet_imagenet + +|模型名称|pnasnet_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|PNASNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|333MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - PNASNet是Google通过AutoML自动训练出来的图像分类模型。该PaddleHub Module基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install pnasnet_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run pnasnet_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="pnasnet_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 修复python2中编码问题 + - ```shell + $ hub install pnasnet_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/repvgg_a0_imagenet/README.md b/modules/image/classification/repvgg_a0_imagenet/README.md index 826db2eae01b92b08972fa399114e396cec14c9c..c9639fe3c25344caaa50e3736166e4e641920187 100644 --- a/modules/image/classification/repvgg_a0_imagenet/README.md +++ b/modules/image/classification/repvgg_a0_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_a0_imagenet==1.0.0 -``` +# repvgg_a0_imagenet -## 命令行预测 +|模型名称|repvgg_a0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|53MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_a0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_a0_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_a0_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_a0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_a0_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_a0_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_a0_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_a0_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_a0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_a0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_a0_imagenet + ``` -```shell -$ hub serving start -m repvgg_a0_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_a0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_a0_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_a1_imagenet/README.md b/modules/image/classification/repvgg_a1_imagenet/README.md index 0bda4997214a7709ae35af22e637e4234023f7a6..0331a17d9dc86da5683c738d09a93c98e0cdfcaf 100644 --- a/modules/image/classification/repvgg_a1_imagenet/README.md +++ b/modules/image/classification/repvgg_a1_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_a1_imagenet==1.0.0 -``` +# repvgg_a1_imagenet -## 命令行预测 +|模型名称|repvgg_a1_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|82MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_a1_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_a1_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_a1_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_a1_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_a1_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_a1_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_a1_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_a1_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_a1_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_a1_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_a1_imagenet + ``` -```shell -$ hub serving start -m repvgg_a1_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_a1_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_a1_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_a2_imagenet/README.md b/modules/image/classification/repvgg_a2_imagenet/README.md index ee342fb033fdc00e6166d9074bc278293d22343e..f10d93eb5296d8f4491882f608a328d4e2e9cf72 100644 --- a/modules/image/classification/repvgg_a2_imagenet/README.md +++ b/modules/image/classification/repvgg_a2_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_a2_imagenet==1.0.0 -``` +# repvgg_a2_imagenet -## 命令行预测 +|模型名称|repvgg_a2_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|163MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_a2_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_a2_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_a2_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_a2_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_a2_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_a2_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_a2_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_a2_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_a2_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_a2_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_a2_imagenet + ``` -```shell -$ hub serving start -m repvgg_a2_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_a2_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_a2_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_b0_imagenet/README.md b/modules/image/classification/repvgg_b0_imagenet/README.md index 7a284dd6c937e7a78c2aebb574cf52961ccfdbc5..f8b9c6994853fed5a41df599f6812c1bed5ce1b0 100644 --- a/modules/image/classification/repvgg_b0_imagenet/README.md +++ b/modules/image/classification/repvgg_b0_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_b0_imagenet==1.0.0 -``` +# repvgg_b0_imagenet -## 命令行预测 +|模型名称|repvgg_b0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|92MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_b0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_b0_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_b0_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_b0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b0_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_b0_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_b0_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_b0_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_b0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_b0_imagenet + ``` -```shell -$ hub serving start -m repvgg_b0_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_b0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_b0_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_b1_imagenet/README.md b/modules/image/classification/repvgg_b1_imagenet/README.md index 1a1e4d05f33626856349bbeb48a20be4dea738d5..07578e568aea228b4765026a92a8163b727345bf 100644 --- a/modules/image/classification/repvgg_b1_imagenet/README.md +++ b/modules/image/classification/repvgg_b1_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_b1_imagenet==1.0.0 -``` +# repvgg_b1_imagenet -## 命令行预测 +|模型名称|repvgg_b1_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|332MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_b1_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_b1_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_b1_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_b1_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b1_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_b1_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_b1_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_b1_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b1_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_b1_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_b1_imagenet + ``` -```shell -$ hub serving start -m repvgg_b1_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_b1_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_b1_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_b1g2_imagenet/README.md b/modules/image/classification/repvgg_b1g2_imagenet/README.md index 8fc6517eddc62157cd9ef506cc276c102fa1eb24..cf73a03852e4f85a3cf5f04b6844db40a1af6f8c 100644 --- a/modules/image/classification/repvgg_b1g2_imagenet/README.md +++ b/modules/image/classification/repvgg_b1g2_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_b1g2_imagenet==1.0.0 -``` +# repvgg_b1g2_imagenet -## 命令行预测 +|模型名称|repvgg_b1g2_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|264MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_b1g2_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_b1g2_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_b1g2_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_b1g2_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b1g2_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_b1g2_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_b1g2_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_b1g2_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b1g2_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_b1g2_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_b1g2_imagenet + ``` -```shell -$ hub serving start -m repvgg_b1g2_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_b1g2_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_b1g2_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_b1g4_imagenet/README.md b/modules/image/classification/repvgg_b1g4_imagenet/README.md index 568c4e0c236e276ca137a397b21510904ad4c04d..3ffe7e0cf5c5448b7b8bdbb2f5da403e9c29e389 100644 --- a/modules/image/classification/repvgg_b1g4_imagenet/README.md +++ b/modules/image/classification/repvgg_b1g4_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_b1g4_imagenet==1.0.0 -``` +# repvgg_b1g4_imagenet -## 命令行预测 +|模型名称|repvgg_b1g4_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|231MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_b1g4_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_b1g4_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_b1g4_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_b1g4_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b1g4_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_b1g4_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_b1g4_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_b1g4_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b1g4_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_b1g4_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_b1g4_imagenet + ``` -```shell -$ hub serving start -m repvgg_b1g4_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_b1g4_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_b1g4_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_b2_imagenet/README.md b/modules/image/classification/repvgg_b2_imagenet/README.md index db2e51baf09648523a2b24b3c6d8722454af907e..4252cc7ad56f959419e595bba8397662b3505524 100644 --- a/modules/image/classification/repvgg_b2_imagenet/README.md +++ b/modules/image/classification/repvgg_b2_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_b2_imagenet==1.0.0 -``` +# repvgg_b2_imagenet -## 命令行预测 +|模型名称|repvgg_b2_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|514MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_b2_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_b2_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_b2_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_b2_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b2_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_b2_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_b2_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_b2_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b2_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_b2_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_b2_imagenet + ``` -```shell -$ hub serving start -m repvgg_b2_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_b2_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_b2_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_b2g4_imagenet/README.md b/modules/image/classification/repvgg_b2g4_imagenet/README.md index 6e16aec1eed0cd2dab24a65052015727f08173da..7f54550241bb753cff0abe7a3e84b0eaf2763922 100644 --- a/modules/image/classification/repvgg_b2g4_imagenet/README.md +++ b/modules/image/classification/repvgg_b2g4_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_b2g4_imagenet==1.0.0 -``` +# repvgg_b2g4_imagenet -## 命令行预测 +|模型名称|repvgg_b2g4_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|357MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_b2g4_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_b2g4_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_b2g4_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_b2g4_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b2g4_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_b2g4_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_b2g4_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_b2g4_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b2g4_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_b2g4_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_b2g4_imagenet + ``` -```shell -$ hub serving start -m repvgg_b2g4_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_b2g4_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_b2g4_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/repvgg_b3g4_imagenet/README.md b/modules/image/classification/repvgg_b3g4_imagenet/README.md index fd76f6b4110558ea9f6b222689fbf0abafc1fa07..fbcf634133d34e26579b7bc2e665e7e7943b5f98 100644 --- a/modules/image/classification/repvgg_b3g4_imagenet/README.md +++ b/modules/image/classification/repvgg_b3g4_imagenet/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install repvgg_b3g4_imagenet==1.0.0 -``` +# repvgg_b3g4_imagenet -## 命令行预测 +|模型名称|repvgg_b3g4_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|RepVGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|485MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run repvgg_b3g4_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - RepVGG(Making VGG-style ConvNets Great Again)系列模型是清华大学(丁桂光团队)、旷视科技(孙建等)、香港科技大学和阿伯里斯特威斯大学于2021年提出的一种简单但功能强大的卷积神经网络架构。有一个类似于 VGG 的推理时间代理。主体由3x3卷积和relu stack组成,而训练时间模型具有多分支拓扑。训练时间和推理时间的解耦是通过重新参数化技术实现的,因此该模型被称为repvgg。 - model = hub.Module(name='repvgg_b3g4_imagenet',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install repvgg_b3g4_imagenet + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run repvgg_b3g4_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b3g4_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用repvgg_b3g4_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='repvgg_b3g4_imagenet', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="repvgg_b3g4_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='repvgg_b3g4_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='repvgg_b3g4_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m repvgg_b3g4_imagenet + ``` -```shell -$ hub serving start -m repvgg_b3g4_imagenet -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/repvgg_b3g4_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/repvgg_b3g4_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md b/modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md index 25c1fcb335c5c46987572de1cc0f3934a3c0138f..75f10a97f1e175b3df1dd60fb957f7133073108f 100644 --- a/modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md +++ b/modules/image/classification/res2net101_vd_26w_4s_imagenet/README.md @@ -1,149 +1,134 @@ -## 命令行预测 +# res2net101_vd_26w_4s_imagenet -``` -hub run res2net101_vd_26w_4s_imagenet --input_path "/PATH/TO/IMAGE" -``` +|模型名称|res2net101_vd_26w_4s_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Res2Net| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|179MB| +|最新更新日期|-| +|数据指标|-| -## API -```python -def get_expected_image_width() -``` +## 一、模型基本信息 -返回预处理的图片宽度,也就是224。 -```python -def get_expected_image_height() -``` -返回预处理的图片高度,也就是224。 +- ### 模型介绍 -```python -def get_pretrained_images_mean() -``` + - Res2Net是2019年提出的一种全新的对ResNet的改进方案,该方案可以和现有其他优秀模块轻松整合,在不增加计算负载量的情况下,在ImageNet、CIFAR-100等数据集上的测试性能超过了ResNet。Res2Net结构简单,性能优越,进一步探索了CNN在更细粒度级别的多尺度表示能力。 该 PaddleHub Module 使用 ImageNet-2012数据集训练,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 -返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 -```python -def get_pretrained_images_std() -``` +## 二、安装 -返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 +- ### 1、环境依赖 + - paddlepaddle >= 1.6.2 -```python -def context(trainable=True, pretrained=True) -``` + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) -**参数** -* trainable (bool): 计算图的参数是否为可训练的; -* pretrained (bool): 是否加载默认的预训练模型。 +- ### 2、安装 -**返回** + - ```shell + $ hub install res2net101_vd_26w_4s_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量; -* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为: - * classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; - * feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 -* context\_prog(fluid.Program): 计算图,用于迁移学习。 +## 三、模型API预测 -```python -def classification(images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): -``` +- ### 1、命令行预测 -**参数** + - ```shell + $ hub run res2net101_vd_26w_4s_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU 来预测; -* top\_k (int): 返回预测结果的前 k 个。 +- ### 2、预测代码示例 -**返回** + - ```python + import paddlehub as hub + import cv2 -res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。 + classifier = hub.Module(name="res2net101_vd_26w_4s_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` +- ### 3、API -将模型保存到指定路径。 -**参数** + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 -## 代码示例 + - **返回** -```python -import paddlehub as hub -import cv2 + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 -classifier = hub.Module(name="res2net101_vd_26w_4s_imagenet") -result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = classifier.classification(paths=['/PATH/TO/IMAGE']) -``` -## 服务部署 +## 四、服务部署 -PaddleHub Serving可以部署一个在线图像识别服务。 +- PaddleHub Serving可以部署一个图像识别的在线服务。 -## 第一步:启动PaddleHub Serving +- ### 第一步:启动PaddleHub Serving -运行启动命令: -```shell -$ hub serving start -m res2net101_vd_26w_4s_imagenet -``` + - 运行启动命令: + - ```shell + $ hub serving start -m res2net101_vd_26w_4s_imagenet + ``` -这样就完成了一个在线图像识别服务化API的部署,默认端口号为8866。 + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -## 第二步:发送预测请求 +- ### 第二步:发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -```python -import requests -import json -import cv2 -import base64 + - ```python + import requests + import json + import cv2 + import base64 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/res2net101_vd_26w_4s_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + # 打印预测结果 + print(r.json()["results"]) + ``` -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/res2net101_vd_26w_4s_imagenet" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -# 打印预测结果 -print(r.json()["results"]) -``` +## 五、更新历史 -### 查看代码 +* 1.0.0 -https://github.com/PaddlePaddle/PaddleClas + 初始发布 -### 依赖 - -paddlepaddle >= 1.6.2 - -paddlehub >= 1.6.0 + - ```shell + $ hub install res2net101_vd_26w_4s_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet18_vd_imagenet/README.md b/modules/image/classification/resnet18_vd_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a84af151bb181b7056235dc5c9c1e45450423e47 --- /dev/null +++ b/modules/image/classification/resnet18_vd_imagenet/README.md @@ -0,0 +1,136 @@ +# resnet18_vd_imagenet + +|模型名称|resnet18_vd_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet_vd| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|46MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNet系列模型是图像分类领域的重要模型之一,模型中提出的残差单元有效地解决了深度网络训练困难的问题,通过增加模型的深度提升了模型的准确率,ResNet-vd 其实就是 ResNet-D,是ResNet 原始结构的变种。该PaddleHub Module结构为ResNet_vd,基于ImageNet-2012数据集训练得到,接受输入图片大小为224 x 224 x 3,支持finetune,也可以直接通过命令行或者Python接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnet18_vd_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnet18_vd_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet18_vd_imagenet") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** + + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 + + - **返回** + + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + + + + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个图像识别的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m resnet18_vd_imagenet + ``` + + - 这样就完成了一个图像识别的在线服务的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet18_vd_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnet18_vd_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet50_vd_10w/README.md b/modules/image/classification/resnet50_vd_10w/README.md new file mode 100644 index 0000000000000000000000000000000000000000..35b736abf8bc9eabdc7cfa74e53c8dfab5a7aeb9 --- /dev/null +++ b/modules/image/classification/resnet50_vd_10w/README.md @@ -0,0 +1,95 @@ +# resnet50_vd_10w + +|模型名称|resnet50_vd_10w| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet_vd| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|92MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNet系列模型是图像分类领域的重要模型之一,模型中提出的残差单元有效地解决了深度网络训练困难的问题,通过增加模型的深度提升了模型的准确率,ResNet-vd 其实就是 ResNet-D,是ResNet 原始结构的变种。该PaddleHub Module结构为ResNet_vd,使用百度自研的基于10万种类别、4千多万的有标签数据进行训练,接受输入图片大小为224 x 224 x 3,支持finetune。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnet50_vd_10w + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet50_vd_10w") + input_dict, output_dict, program = classifier.context(trainable=True) + ``` + +- ### 2、API + + - ```python + def context(trainable=True, pretrained=True) + ``` + - **参数** + - trainable (bool): 计算图的参数是否为可训练的;
+ - pretrained (bool): 是否加载默认的预训练模型。 + + - **返回** + - inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量;
+ - outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为: + - classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; + - feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 + - context\_prog(fluid.Program): 计算图,用于迁移学习。 + + + + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` + - **参数** + - dirname: 存在模型的目录名称;
+ - model_filename: 模型文件名称,默认为\_\_model\_\_;
+ - params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效);
+ - combined: 是否将参数保存到统一的一个文件中。 + + + + + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnet50_vd_10w==1.0.0 + ``` diff --git a/modules/image/classification/resnet50_vd_animals/README.md b/modules/image/classification/resnet50_vd_animals/README.md index 8b56c5f61fffa193817415d21105624601840e9f..a42168e27330a2e66d93a463ca8ce87553c2a2c8 100644 --- a/modules/image/classification/resnet50_vd_animals/README.md +++ b/modules/image/classification/resnet50_vd_animals/README.md @@ -1,159 +1,169 @@ -```shell -$ hub install resnet50_vd_animals==1.0.0 -``` +# resnet50_vd_animals -

-
ResNet 系列的网络结构 -

+|模型名称|resnet50_vd_animals| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet50_vd| +|数据集|百度自建动物数据集| +|是否支持Fine-tuning|否| +|模型大小|154MB| +|指标|-| +|最新更新日期|2021-02-26| -模型的详情可参考[论文](https://arxiv.org/pdf/1812.01187.pdf) -## 命令行预测 +## 一、模型基本信息 -``` -hub run resnet50_vd_animals --input_path "/PATH/TO/IMAGE" -``` -## API +- ### 模型介绍 -```python -def get_expected_image_width() -``` + - ResNet-vd 其实就是 ResNet-D,是ResNet 原始结构的变种,可用于图像分类和特征提取。该 PaddleHub Module 采用百度自建动物数据集训练得到,支持7978种动物的分类识别。 -返回预处理的图片宽度,也就是224。 + - 模型的详情可参考[论文](https://arxiv.org/pdf/1812.01187.pdf) -```python -def get_expected_image_height() -``` +## 二、安装 -返回预处理的图片高度,也就是224。 +- ### 1、环境依赖 -```python -def get_pretrained_images_mean() -``` + - paddlepaddle >= 2.0.0 -返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 + - paddlehub >= 2.0.0 -```python -def get_pretrained_images_std() -``` +- ### 2、安装 -返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 + - ```shell + $ hub install resnet50_vd_animals==1.0.0 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API预测 + +- ### 1、命令行预测 -```python -def context(trainable=True, pretrained=True) -``` + - ``` + hub run resnet50_vd_animals --input_path "/PATH/TO/IMAGE" + ``` -**参数** +- ### 2、代码示例 -* trainable (bool): 计算图的参数是否为可训练的; -* pretrained (bool): 是否加载默认的预训练模型。 + - ```python + import paddlehub as hub + import cv2 -**返回** + classifier = hub.Module(name="resnet50_vd_animals") -* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量; -* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为: - * classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; - * feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 -* context\_prog(fluid.Program): 计算图,用于迁移学习。 + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` +- ### 3、API -```python -def classification(images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): -``` + - ```python + def get_expected_image_width() + ``` -**参数** + - 返回预处理的图片宽度,也就是224。 -* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU 来预测; -* top\_k (int): 返回预测结果的前 k 个。 + - ```python + def get_expected_image_height() + ``` -**返回** + - 返回预处理的图片高度,也就是224。 -res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。 + - ```python + def get_pretrained_images_mean() + ``` -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` + - 返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 -将模型保存到指定路径。 + - ```python + def get_pretrained_images_std() + ``` -**参数** + - 返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 -* dirname: 存在模型的目录名称 -* model_filename: 模型文件名称,默认为\_\_model\_\_ -* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 -## 代码示例 + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` -```python -import paddlehub as hub -import cv2 + - **参数** -classifier = hub.Module(name="resnet50_vd_animals") + * images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; + * paths (list\[str\]): 图片的路径; + * batch\_size (int): batch 的大小; + * use\_gpu (bool): 是否使用 GPU 来预测; + * top\_k (int): 返回预测结果的前 k 个。 -result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = classifier.classification(paths=['/PATH/TO/IMAGE']) -``` + - **返回** -## 服务部署 + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。 -PaddleHub Serving可以部署一个在线动物识别服务。 + - ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` -## 第一步:启动PaddleHub Serving + - 将模型保存到指定路径。 -运行启动命令: -```shell -$ hub serving start -m resnet50_vd_animals -``` + - **参数** -这样就完成了一个在线动物识别服务化API的部署,默认端口号为8866。 + * dirname: 存在模型的目录名称 + * model_filename: 模型文件名称,默认为\_\_model\_\_ + * params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -## 第二步:发送预测请求 +## 四、服务部署 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 +- PaddleHub Serving可以部署一个在线动物识别服务。 -```python -import requests -import json -import cv2 -import base64 +- ### 第一步:启动PaddleHub Serving + - 运行启动命令: -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') + - ```shell + $ hub serving start -m resnet50_vd_animals + ``` + - 这样就完成了一个在线动物识别服务化API的部署,默认端口号为8866。 -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/resnet50_vd_animals" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -# 打印预测结果 -print(r.json()["results"]) -``` +- ### 第二步:发送预测请求 -### 查看代码 +- 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -[PaddlePaddle/models 图像分类](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification) + ```python + import requests + import json + import cv2 + import base64 -### 依赖 -paddlepaddle >= 1.6.2 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') -paddlehub >= 1.6.0 + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_animals" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/classification/resnet50_vd_animals/module.py b/modules/image/classification/resnet50_vd_animals/module.py index 18eb3db106b1dcb5db7828271f5d18e243986739..8c907e12daca7860fcbdc66b58e5e319fd5eeab1 100644 --- a/modules/image/classification/resnet50_vd_animals/module.py +++ b/modules/image/classification/resnet50_vd_animals/module.py @@ -9,7 +9,9 @@ import os import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddle.inference import Config +from paddle.inference import create_predictor + from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -47,26 +49,53 @@ class ResNet50vdAnimals(hub.Module): im_std = np.array([0.229, 0.224, 0.225]).reshape(1, 3) return im_std + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True): """context for transfer learning. @@ -116,7 +145,7 @@ class ResNet50vdAnimals(hub.Module): param.trainable = trainable return inputs, outputs, context_prog - def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1): + def classification(self, images=None, paths=None, batch_size=1, use_gpu=False, top_k=1, use_device=None): """ API for image classification. @@ -126,18 +155,30 @@ class ResNet50vdAnimals(hub.Module): batch_size (int): batch size. use_gpu (bool): Whether to use gpu. top_k (int): Return top k results. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The classfication results. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor all_data = list() for yield_data in reader(images, paths): @@ -157,10 +198,16 @@ class ResNet50vdAnimals(hub.Module): pass # feed batch image batch_image = np.array([data['image'] for data in batch_data]) - batch_image = PaddleTensor(batch_image.copy()) - predictor_output = self.gpu_predictor.run([batch_image]) if use_gpu else self.cpu_predictor.run( - [batch_image]) - out = postprocess(data_out=predictor_output[0].as_ndarray(), label_list=self.label_list, top_k=top_k) + + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + input_tensor.reshape(batch_image.shape) + input_tensor.copy_from_cpu(batch_image.copy()) + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + predictor_output = output_handle.copy_to_cpu() + out = postprocess(data_out=predictor_output, label_list=self.label_list, top_k=top_k) res += out return res @@ -174,14 +221,13 @@ class ResNet50vdAnimals(hub.Module): program, feeded_var_names, target_vars = fluid.io.load_inference_model( dirname=self.default_pretrained_model_path, executor=exe) - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) + fluid.io.save_inference_model(dirname=dirname, + main_program=program, + executor=exe, + feeded_var_names=feeded_var_names, + target_vars=target_vars, + model_filename=model_filename, + params_filename=params_filename) @serving def serving_method(self, images, **kwargs): @@ -197,28 +243,36 @@ class ResNet50vdAnimals(hub.Module): """ Run as a command. """ - self.parser = argparse.ArgumentParser( - description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) - results = self.classification(paths=[args.input_path], batch_size=args.batch_size, use_gpu=args.use_gpu) + results = self.classification(paths=[args.input_path], + batch_size=args.batch_size, + use_gpu=args.use_gpu, + top_k=args.top_k, + use_device=args.use_device) return results def add_module_config_arg(self): """ Add the command config options. """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not.") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not.") self.arg_config_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_config_group.add_argument('--top_k', type=ast.literal_eval, default=1, help="Return top k results.") + self.arg_config_group.add_argument('--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ diff --git a/modules/image/classification/resnet50_vd_dishes/README.md b/modules/image/classification/resnet50_vd_dishes/README.md index abd32a6ac2257b079a58abb9c02b44db1091211a..c5108d3d2ec69ba4af4c7c9085f948460417cfb7 100644 --- a/modules/image/classification/resnet50_vd_dishes/README.md +++ b/modules/image/classification/resnet50_vd_dishes/README.md @@ -1,159 +1,140 @@ -```shell -$ hub install resnet50_vd_dishes==1.0.0 -``` +# resnet50_vd_dishes -

-
ResNet 系列的网络结构 -

- -模型的详情可参考[论文](https://arxiv.org/pdf/1812.01187.pdf) - -## 命令行预测 +|模型名称|resnet50_vd_dishes| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet50_vd| +|数据集|百度自建菜品数据集| +|是否支持Fine-tuning|否| +|模型大小|158MB| +|最新更新日期|-| +|数据指标|-| -``` -hub run resnet50_vd_dishes --input_path "/PATH/TO/IMAGE" -``` -## API +## 一、模型基本信息 -```python -def get_expected_image_width() -``` -返回预处理的图片宽度,也就是224。 -```python -def get_expected_image_height() -``` +- ### 模型介绍 -返回预处理的图片高度,也就是224。 + - ResNet-vd是ResNet原始结构的变种,可用于图像分类和特征提取。该 PaddleHub Module 采用百度自建菜品数据集训练得到,支持8416种菜品的分类识别。 -```python -def get_pretrained_images_mean() -``` - -返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 +

+
+

-```python -def get_pretrained_images_std() -``` + - 更多详情参考:[Bag of Tricks for Image Classification with Convolutional Neural Networks](https://arxiv.org/pdf/1812.01187.pdf) -返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 +## 二、安装 +- ### 1、环境依赖 -```python -def context(trainable=True, pretrained=True) -``` + - paddlepaddle >= 1.6.2 -**参数** + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) -* trainable (bool): 计算图的参数是否为可训练的; -* pretrained (bool): 是否加载默认的预训练模型。 -**返回** +- ### 2、安装 -* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量; -* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature_map',其相应的值为: - * classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; - * feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 -* context\_prog(fluid.Program): 计算图,用于迁移学习。 + - ```shell + $ hub install resnet50_vd_dishes + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -```python -def classification(images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): -``` +## 三、模型API预测 -**参数** +- ### 1、命令行预测 -* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU 来预测; -* top\_k (int): 返回预测结果的前 k 个。 + - ```shell + $ hub run resnet50_vd_dishes --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现菜品分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -**返回** +- ### 2、预测代码示例 -res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 + - ```python + import paddlehub as hub + import cv2 -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` + classifier = hub.Module(name="resnet50_vd_dishes") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` -将模型保存到指定路径。 +- ### 3、API -**参数** -* dirname: 存在模型的目录名称 -* model_filename: 模型文件名称,默认为\_\_model\_\_ -* params_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** -## 代码示例 + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 -```python -import paddlehub as hub -import cv2 + - **返回** -classifier = hub.Module(name="resnet50_vd_dishes") + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 -result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = classifier.classification(paths=['/PATH/TO/IMAGE']) -``` -## 服务部署 -PaddleHub Serving可以部署一个菜品分类的在线服务。 -## 第一步:启动PaddleHub Serving +## 四、服务部署 -运行启动命令: -```shell -$ hub serving start -m resnet50_vd_dishes -``` +- PaddleHub Serving可以部署一个菜品分类的在线服务。 -这样就完成了一个菜品分类的在线服务的部署,默认端口号为8866。 +- ### 第一步:启动PaddleHub Serving -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - 运行启动命令: + - ```shell + $ hub serving start -m resnet50_vd_dishes + ``` -## 第二步:发送预测请求 + - 这样就完成了一个菜品分类的在线服务的部署,默认端口号为8866。 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -```python -import requests -import json -import cv2 -import base64 +- ### 第二步:发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') + - ```python + import requests + import json + import cv2 + import base64 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/resnet50_vd_dishes" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_dishes" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) -# 打印预测结果 -print(r.json()["results"]) -``` + # 打印预测结果 + print(r.json()["results"]) + ``` -### 查看代码 -[PaddlePaddle/models 图像分类](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification) +## 五、更新历史 -### 依赖 +* 1.0.0 -paddlepaddle >= 1.6.2 + 初始发布 -paddlehub >= 1.6.0 + - ```shell + $ hub install resnet50_vd_dishes==1.0.0 + ``` diff --git a/modules/image/classification/resnet50_vd_imagenet_ssld/README.md b/modules/image/classification/resnet50_vd_imagenet_ssld/README.md new file mode 100644 index 0000000000000000000000000000000000000000..229e5d0c8400152d73f354f13dab546a3f8b749c --- /dev/null +++ b/modules/image/classification/resnet50_vd_imagenet_ssld/README.md @@ -0,0 +1,200 @@ +# resnet50_vd_imagenet_ssld + +|模型名称|resnet50_vd_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet_vd| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|148MB| +|指标|-| +|最新更新日期|2021-02-26| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - ResNet-vd 其实就是 ResNet-D,是ResNet 原始结构的变种,可用于图像分类和特征提取。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install resnet50_vd_imagenet_ssld + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + $ hub run resnet50_vd_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 + + ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + + model = hub.Module(name='resnet50_vd_imagenet_ssld') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用resnet50_vd_imagenet_ssld对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + + flowers = Flowers(transforms) + + flowers_validate = Flowers(transforms, mode='val') + ``` + + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name="resnet50_vd_imagenet_ssld", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 + + - Step4: 选择优化策略和运行配置 + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` + + + - 运行配置 + + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; + + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: + + - ```python + import paddle + import paddlehub as hub + + if __name__ == '__main__': + + model = hub.Module(name='resnet50_vd_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` + + + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线分类任务服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m resnet50_vd_imagenet_ssld + ``` + + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 升级为动态图版本 + diff --git a/modules/image/classification/resnet50_vd_wildanimals/README.md b/modules/image/classification/resnet50_vd_wildanimals/README.md index f4415cccc069b6686be3d3a978b7db8de3ec72b4..d857c89b70156dda3891da0994336ca7d5f801fc 100644 --- a/modules/image/classification/resnet50_vd_wildanimals/README.md +++ b/modules/image/classification/resnet50_vd_wildanimals/README.md @@ -1,159 +1,134 @@ -```shell -$ hub install resnet50_vd_wildanimals==1.0.0 -``` +# resnet50_vd_wildanimals -

-
ResNet 系列的网络结构 -

+|模型名称|resnet50_vd_wildanimals| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet_vd| +|数据集|IFAW 自建野生动物数据集| +|是否支持Fine-tuning|否| +|模型大小|92MB| +|最新更新日期|-| +|数据指标|-| -模型的详情可参考[论文](https://arxiv.org/pdf/1812.01187.pdf) -## 命令行预测 +## 一、模型基本信息 -``` -hub run resnet50_vd_wildanimals --input_path "/PATH/TO/IMAGE" -``` -## API -```python -def get_expected_image_width() -``` +- ### 模型介绍 -返回预处理的图片宽度,也就是224。 + - ResNet-vd 其实就是 ResNet-D,是ResNet 原始结构的变种,可用于图像分类和特征提取。该 PaddleHub Module 采用百度自建野生动物数据集训练得到,支持'象牙制品','象牙', '大象', '虎皮', '老虎', '虎牙/虎爪/虎骨', '穿山甲甲片', '穿山甲', '穿山甲爪子', '其他' 这十个标签的识别。模型的详情可参考[论文](https://arxiv.org/pdf/1812.01187.pdf)。 -```python -def get_expected_image_height() -``` -返回预处理的图片高度,也就是224。 -```python -def get_pretrained_images_mean() -``` +## 二、安装 -返回预处理的图片均值,也就是 \[0.485, 0.456, 0.406\]。 +- ### 1、环境依赖 -```python -def get_pretrained_images_std() -``` + - paddlepaddle >= 1.6.2 -返回预处理的图片标准差,也就是 \[0.229, 0.224, 0.225\]。 + - paddlehub >= 1.6.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) -```python -def context(trainable=True, pretrained=True) -``` +- ### 2、安装 -**参数** + - ```shell + $ hub install resnet50_vd_wildanimals + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* trainable (bool): 计算图的参数是否为可训练的; -* pretrained (bool): 是否加载默认的预训练模型。 +## 三、模型API预测 -**返回** +- ### 1、命令行预测 -* inputs (dict): 计算图的输入,key 为 'image', value 为图片的张量; -* outputs (dict): 计算图的输出,key 为 'classification' 和 'feature\_map',其相应的值为: - * classification (paddle.fluid.framework.Variable): 分类结果,也就是全连接层的输出; - * feature\_map (paddle.fluid.framework.Variable): 特征匹配,全连接层前面的那个张量。 -* context\_prog(fluid.Program): 计算图,用于迁移学习。 + - ```shell + $ hub run resnet50_vd_wildanimals --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -```python -def classification(images=None, - paths=None, - batch_size=1, - use_gpu=False, - top_k=1): -``` +- ### 2、预测代码示例 -**参数** + - ```python + import paddlehub as hub + import cv2 -* images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU 来预测; -* top\_k (int): 返回预测结果的前 k 个。 + classifier = hub.Module(name="resnet50_vd_wildanimals") + result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) + # or + # result = classifier.classification(paths=['/PATH/TO/IMAGE']) + ``` -**返回** +- ### 3、API -res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别动物的类别,value为置信度。 -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` + - ```python + def classification(images=None, + paths=None, + batch_size=1, + use_gpu=False, + top_k=1): + ``` + - 分类接口API。 + - **参数** -将模型保存到指定路径。 + - images (list\[numpy.ndarray\]): 图片数据,每一个图片数据的shape 均为 \[H, W, C\],颜色空间为 BGR;
+ - paths (list\[str\]): 图片的路径;
+ - batch\_size (int): batch 的大小;
+ - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量**
+ - top\_k (int): 返回预测结果的前 k 个。 -**参数** + - **返回** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - res (list\[dict\]): 分类结果,列表的每一个元素均为字典,其中 key 为识别的菜品类别,value为置信度。 -## 代码示例 -```python -import paddlehub as hub -import cv2 -classifier = hub.Module(name="resnet50_vd_wildanimals") +## 四、服务部署 -result = classifier.classification(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = classifier.classification(paths=['/PATH/TO/IMAGE']) -``` +- PaddleHub Serving可以部署一个野生动物及其制品识别的在线服务。 -## 服务部署 +- ### 第一步:启动PaddleHub Serving -PaddleHub Serving可以部署一个野生动物及其制品的在线识别服务。 + - 运行启动命令: + - ```shell + $ hub serving start -m resnet50_vd_wildanimals + ``` -## 第一步:启动PaddleHub Serving + - 这样就完成了一个野生动物及其制品识别的在线服务的部署,默认端口号为8866。 -运行启动命令: -```shell -$ hub serving start -m resnet50_vd_wildanimals -``` + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -这样就完成了一个野生动物及其制品的在线服务的部署,默认端口号为8866。 +- ### 第二步:发送预测请求 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -## 第二步:发送预测请求 + - ```python + import requests + import json + import cv2 + import base64 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') -```python -import requests -import json -import cv2 -import base64 + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/resnet50_vd_wildanimals" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + # 打印预测结果 + print(r.json()["results"]) + ``` -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') +## 五、更新历史 -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/resnet50_vd_wildanimals" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) +* 1.0.0 -# 打印预测结果 -print(r.json()["results"]) -``` - -### 查看代码 - -[PaddlePaddle/models 图像分类](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification) - -### 依赖 - -paddlepaddle >= 1.6.2 - -paddlehub >= 1.6.0 + 初始发布 + - ```shell + $ hub install resnet50_vd_wildanimals==1.0.0 + ``` diff --git a/modules/image/classification/resnet_v2_101_imagenet/README.md b/modules/image/classification/resnet_v2_101_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8533fb4b21496e17eef80ccd2b486f5ff2076a99 --- /dev/null +++ b/modules/image/classification/resnet_v2_101_imagenet/README.md @@ -0,0 +1,86 @@ +# resnet_v2_101_imagenet + +|模型名称|resnet_v2_101_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet V2 101| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|173MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNet系列模型是图像分类领域的重要模型之一,模型中提出的残差单元有效地解决了深度网络训练困难的问题,通过增加模型的深度提升了模型的准确率。该PaddleHub Module结构为ResNet101,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnet_v2_101_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnet_v2_101_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_101_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + 修复python2中编码问题 + - ```shell + $ hub install resnet_v2_101_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/resnet_v2_152_imagenet/README.md b/modules/image/classification/resnet_v2_152_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f849e95ed6e3cc5910f70321ceaba467702e3447 --- /dev/null +++ b/modules/image/classification/resnet_v2_152_imagenet/README.md @@ -0,0 +1,86 @@ +# resnet_v2_152_imagenet + +|模型名称|resnet_v2_152_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet V2| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|234MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNet系列模型是图像分类领域的重要模型之一,模型中提出的残差单元有效地解决了深度网络训练困难的问题,通过增加模型的深度提升了模型的准确率。该PaddleHub Module结构为ResNet152,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnet_v2_152_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnet_v2_152_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_152_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + 修复python2中编码问题 + - ```shell + $ hub install resnet_v2_152_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/resnet_v2_18_imagenet/README.md b/modules/image/classification/resnet_v2_18_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23a83f47686c1b76a5b49fd487b954d5102b8b44 --- /dev/null +++ b/modules/image/classification/resnet_v2_18_imagenet/README.md @@ -0,0 +1,84 @@ +# resnet_v2_18_imagenet + +|模型名称|resnet_v2_18_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet V2| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|46MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNet系列模型是图像分类领域的重要模型之一,模型中提出的残差单元有效地解决了深度网络训练困难的问题,通过增加模型的深度提升了模型的准确率。该PaddleHub Module结构为ResNet18,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnet_v2_18_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnet_v2_18_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_18_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnet_v2_18_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet_v2_34_imagenet/README.md b/modules/image/classification/resnet_v2_34_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d8752ec5d46ad8e0f1931072ebde00beea8b6843 --- /dev/null +++ b/modules/image/classification/resnet_v2_34_imagenet/README.md @@ -0,0 +1,84 @@ +# resnet_v2_34_imagenet + +|模型名称|resnet_v2_34_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet V2| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|85MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNet系列模型是图像分类领域的重要模型之一,模型中提出的残差单元有效地解决了深度网络训练困难的问题,通过增加模型的深度提升了模型的准确率。该PaddleHub Module结构为ResNet34,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnet_v2_34_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnet_v2_34_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_34_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnet_v2_34_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnet_v2_50_imagenet/README.md b/modules/image/classification/resnet_v2_50_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3963bd759ccfe6d651582401276ab580d552cbfc --- /dev/null +++ b/modules/image/classification/resnet_v2_50_imagenet/README.md @@ -0,0 +1,86 @@ +# resnet_v2_50_imagenet + +|模型名称|resnet_v2_50_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNet V2| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|99MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNet系列模型是图像分类领域的重要模型之一,模型中提出的残差单元有效地解决了深度网络训练困难的问题,通过增加模型的深度提升了模型的准确率。该PaddleHub Module结构为ResNet50,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnet_v2_50_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnet_v2_50_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnet_v2_50_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + 修复python2中编码问题 + - ```shell + $ hub install resnet_v2_50_imagenet==1.0.1 + ``` diff --git a/modules/image/classification/resnext101_32x16d_wsl/README.md b/modules/image/classification/resnext101_32x16d_wsl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7b6501be1e191f25ca7c85dc418ab61b05e70095 --- /dev/null +++ b/modules/image/classification/resnext101_32x16d_wsl/README.md @@ -0,0 +1,84 @@ +# resnext101_32x16d_wsl + +|模型名称|resnext101_32x16d_wsl| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_wsl| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|744MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 由于人工标注的数据集在规模上已经接近其函数极限,Facebook 的研发人员采用了一种独特的迁移学习研究,通过使用 hashtag 作为标注,在包含数十亿张社交媒体图片的数据集上进行训练,这为大规模训练转向弱监督学习(Weakly Supervised Learning) 取得了重大突破。在 ImageNet 图像识别基准上,ResNeXt101_32x16d_wsl 的 Top-1 达到了 84.24% 的准确率。该 PaddleHub Module结构为 ResNeXt101_32x16d_wsl,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_32x16d_wsl + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_32x16d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x16d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext101_32x16d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x32d_wsl/README.md b/modules/image/classification/resnext101_32x32d_wsl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f3f37f3d4be0260831cfc96c7052a00199b698af --- /dev/null +++ b/modules/image/classification/resnext101_32x32d_wsl/README.md @@ -0,0 +1,84 @@ +# resnext101_32x32d_wsl + +|模型名称|resnext101_32x32d_wsl| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_wsl| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|1.8GB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 由于人工标注的数据集在规模上已经接近其函数极限,Facebook 的研发人员采用了一种独特的迁移学习研究,通过使用 hashtag 作为标注,在包含数十亿张社交媒体图片的数据集上进行训练,这为大规模训练转向弱监督学习(Weakly Supervised Learning) 取得了重大突破。在 ImageNet 图像识别基准上,ResNeXt101_32x32d_wsl 的 Top-1 达到了 84.97% 的准确率。该 PaddleHub Module结构为 ResNeXt101_32x32d_wsl,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_32x32d_wsl + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_32x32d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x32d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext101_32x32d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x48d_wsl/README.md b/modules/image/classification/resnext101_32x48d_wsl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..24603e39ac1ac93c065ea35a25a6ad6c69959750 --- /dev/null +++ b/modules/image/classification/resnext101_32x48d_wsl/README.md @@ -0,0 +1,84 @@ +# resnext101_32x48d_wsl + +|模型名称|resnext101_32x48d_wsl| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_wsl| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|342MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 由于人工标注的数据集在规模上已经接近其函数极限,Facebook 的研发人员采用了一种独特的迁移学习研究,通过使用 hashtag 作为标注,在包含数十亿张社交媒体图片的数据集上进行训练,这为大规模训练转向弱监督学习(Weakly Supervised Learning) 取得了重大突破。在 ImageNet 图像识别基准上,ResNeXt101_32x48d_wsl 的 Top-1 达到了 85.4% 的准确率。该 PaddleHub Module结构为 ResNeXt101_32x48d_wsl,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_32x48d_wsl + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_32x48d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x48d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext101_32x48d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x4d_imagenet/README.md b/modules/image/classification/resnext101_32x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..60a0e27f7431ed79619a65f7fb91075b54eee2b3 --- /dev/null +++ b/modules/image/classification/resnext101_32x4d_imagenet/README.md @@ -0,0 +1,85 @@ +# resnext101_32x4d_imagenet + +|模型名称|resnext101_32x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|172MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext101_32x4d,表示 layers 为 101, 分支数为 32,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_32x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext101_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_32x8d_wsl/README.md b/modules/image/classification/resnext101_32x8d_wsl/README.md new file mode 100644 index 0000000000000000000000000000000000000000..94f8491dc53bd9905bcb163b84839ba3fc309527 --- /dev/null +++ b/modules/image/classification/resnext101_32x8d_wsl/README.md @@ -0,0 +1,84 @@ +# resnext101_32x8d_wsl + +|模型名称|resnext101_32x8d_wsl| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_wsl| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|317MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 由于人工标注的数据集在规模上已经接近其函数极限,Facebook 的研发人员采用了一种独特的迁移学习研究,通过使用 hashtag 作为标注,在包含数十亿张社交媒体图片的数据集上进行训练,这为大规模训练转向弱监督学习(Weakly Supervised Learning) 取得了重大突破。在 ImageNet 图像识别基准上,ResNeXt101_32x8d_wsl 的 Top-1 达到了 82.55% 的准确率。该 PaddleHub Module结构为 ResNeXt101_32x8d_wsl,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.6.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_32x8d_wsl + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_32x8d_wsl --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_32x8d_wsl") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext101_32x8d_wsl==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_64x4d_imagenet/README.md b/modules/image/classification/resnext101_64x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..588f2dbabc6bba44e290ec1fc75ed1a75dcf22ec --- /dev/null +++ b/modules/image/classification/resnext101_64x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# resnext101_64x4d_imagenet + +|模型名称|resnext101_64x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|322MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext101_64x4d,表示 layers 为 101, 分支数为 64,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_64x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext101_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_vd_32x4d_imagenet/README.md b/modules/image/classification/resnext101_vd_32x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7c21889b7429f2b1cbd8dd42d0abea640a6519bd --- /dev/null +++ b/modules/image/classification/resnext101_vd_32x4d_imagenet/README.md @@ -0,0 +1,83 @@ +# resnext101_vd_32x4d_imagenet + +|模型名称|resnext101_vd_32x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|172MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext101_vd_32x4d,表示 layers 为 101, 分支数为 32,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_vd_32x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_vd_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_vd_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install resnext101_vd_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext101_vd_64x4d_imagenet/README.md b/modules/image/classification/resnext101_vd_64x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b6d6c5c025cb98164f3797b9c4e4e7fa4e192b2c --- /dev/null +++ b/modules/image/classification/resnext101_vd_64x4d_imagenet/README.md @@ -0,0 +1,83 @@ +# resnext101_vd_64x4d_imagenet + +|模型名称|resnext101_vd_64x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_vd| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|172MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext101_vd_64x4d,表示 layers 为 101, 分支数为 64,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext101_vd_64x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext101_vd_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext101_vd_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install resnext101_vd_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext152_32x4d_imagenet/README.md b/modules/image/classification/resnext152_32x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d748c6d52cba08259034b6cb68125d4574d8179f --- /dev/null +++ b/modules/image/classification/resnext152_32x4d_imagenet/README.md @@ -0,0 +1,85 @@ +# resnext152_32x4d_imagenet + +|模型名称|resnext152_32x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|233MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext152_32x4d,表示 layers 为 152, 分支数为32,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext152_32x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext152_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext152_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext152_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext152_64x4d_imagenet/README.md b/modules/image/classification/resnext152_64x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..43508a2fedf83dc465fa1e30f526a9274237c2a1 --- /dev/null +++ b/modules/image/classification/resnext152_64x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# resnext152_64x4d_imagenet + +|模型名称|resnext152_64x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|444MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext152_64x4d,表示 layers 为 152, 分支数为64,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext152_64x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext152_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext152_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext152_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext152_vd_64x4d_imagenet/README.md b/modules/image/classification/resnext152_vd_64x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..537dae7832d4786b2139b686e8cfc243eb9b5be0 --- /dev/null +++ b/modules/image/classification/resnext152_vd_64x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# resnext152_vd_64x4d_imagenet + +|模型名称|resnext152_vd_64x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_vd| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|444MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext152_vd_64x4d,表示 layers 为 152, 分支数为64,每个分支的输入输出 channels, 并采用了 3 个 3*3 的卷积核替代 ResNeXt152_64x4d 中第一个 7*7 的卷积核。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext152_vd_64x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext152_vd_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext152_vd_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext152_vd_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_32x4d_imagenet/README.md b/modules/image/classification/resnext50_32x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..474bd4cac3c96f9d13e9eacb9872c8b72e4eaf90 --- /dev/null +++ b/modules/image/classification/resnext50_32x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# resnext50_32x4d_imagenet + +|模型名称|resnext50_32x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|97MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext50_32x4d,表示 layers 为 50, 分支数为 32,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext50_32x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext50_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext50_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_64x4d_imagenet/README.md b/modules/image/classification/resnext50_64x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..740c56812597a911dfb4b21b9bacf1c6852e16ba --- /dev/null +++ b/modules/image/classification/resnext50_64x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# resnext50_64x4d_imagenet + +|模型名称|resnext50_64x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|174MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext50_64x4d,表示 layers 为 50, 分支数为 64,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext50_64x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext50_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext50_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_vd_32x4d_imagenet/README.md b/modules/image/classification/resnext50_vd_32x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..02e3585777c743ebaeebf90228455a32ae20c827 --- /dev/null +++ b/modules/image/classification/resnext50_vd_32x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# resnext50_vd_32x4d_imagenet + +|模型名称|resnext50_vd_32x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_vd| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|98MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext50_vd_32x4d,表示 layers 为 50, 分支数为 32,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext50_vd_32x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext50_vd_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_vd_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install resnext50_vd_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/resnext50_vd_64x4d_imagenet/README.md b/modules/image/classification/resnext50_vd_64x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5b473b5c869ada3001d01b3cb8a8cf0a3e3adc11 --- /dev/null +++ b/modules/image/classification/resnext50_vd_64x4d_imagenet/README.md @@ -0,0 +1,83 @@ +# resnext50_vd_64x4d_imagenet + +|模型名称|resnext50_vd_64x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ResNeXt_vd| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|175MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ResNeXt 是由 UC San Diego 和 Facebook AI 研究所于2017年提出的图像分类模型,模型沿袭了 VGG/ResNets 的堆叠思想,并采用 split-transform-merge 策略来增加网络的分支数。resnext50_vd_64x4d,表示 layers 为 50, 分支数为 64,每个分支的输入输出 channels 为4。该 PaddleHub Module 在包含数十亿张社交媒体图片的数据集上进行弱监督训练,并使用ImageNet-2012数据集finetune,接受输入图片大小为 224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install resnext50_vd_64x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run resnext50_vd_64x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="resnext50_vd_64x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install resnext50_vd_64x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/rexnet_1_0_imagenet/README.md b/modules/image/classification/rexnet_1_0_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c04f376bf099a69ccdd8c8c1948be9f265241ccf --- /dev/null +++ b/modules/image/classification/rexnet_1_0_imagenet/README.md @@ -0,0 +1,181 @@ +# rexnet_1_0_imagenet + +|模型名称|rexnet_1_0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ReXNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|28MB| +|指标|-| +|最新更新日期|2021-09-14| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - ReXNet 由 NAVER AI Lab 提出的基于新的网络设计原则而设计的网络。作者针对现有网络中具有代表性的瓶颈问题,提出了一套设计原则,他们认为,常规设计会产生代表性瓶颈,这会影响模型性能。为了研究表征瓶颈,作者研究了由一万个随机网络生成的特征的矩阵秩。此外,还研究了整个层的通道配置以设计更准确的网络架构。最后,作者提出了一套简单有效的设计原则来缓解表征瓶颈。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install rexnet_1_0_imagenet + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + $ hub run rexnet_1_0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 + + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_1_0_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用rexnet_1_0_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` + + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name="rexnet_1_0_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 + + - Step4: 选择优化策略和运行配置 + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` + + + - 运行配置 + + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; + + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: + + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_1_0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` + + + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线分类任务服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m rexnet_1_0_imagenet + ``` + + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/rexnet_1_0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/classification/rexnet_1_0_imagenet/label_list.txt b/modules/image/classification/rexnet_1_0_imagenet/label_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..52baabc68e968dde482ca143728295355d83203a --- /dev/null +++ b/modules/image/classification/rexnet_1_0_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenters kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potters wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow ladys slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/modules/image/classification/rexnet_1_0_imagenet/module.py b/modules/image/classification/rexnet_1_0_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e74681e5c10472649f048d37f143e2abd29df5d5 --- /dev/null +++ b/modules/image/classification/rexnet_1_0_imagenet/module.py @@ -0,0 +1,196 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from math import ceil +from typing import Union + +import paddle +import paddle.nn as nn +import numpy as np +import paddlehub.vision.transforms as T +from paddle import ParamAttr +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def conv_bn_act(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1, active=True, relu6=False): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + if active: + out.append(nn.ReLU6() if relu6 else nn.ReLU()) + + +def conv_bn_swish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + out.append(nn.Swish()) + + +class SE(nn.Layer): + def __init__(self, in_channels, channels, se_ratio=12): + super(SE, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential( + nn.Conv2D(in_channels, channels // se_ratio, kernel_size=1, padding=0), + nn.BatchNorm2D(channels // se_ratio), nn.ReLU(), + nn.Conv2D(channels // se_ratio, channels, kernel_size=1, padding=0), nn.Sigmoid()) + + def forward(self, x): + y = self.avg_pool(x) + y = self.fc(y) + return x * y + + +class LinearBottleneck(nn.Layer): + def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12, **kwargs): + super(LinearBottleneck, self).__init__(**kwargs) + self.use_shortcut = stride == 1 and in_channels <= channels + self.in_channels = in_channels + self.out_channels = channels + + out = [] + if t != 1: + dw_channels = in_channels * t + conv_bn_swish(out, in_channels=in_channels, channels=dw_channels) + else: + dw_channels = in_channels + + conv_bn_act( + out, + in_channels=dw_channels, + channels=dw_channels, + kernel=3, + stride=stride, + pad=1, + num_group=dw_channels, + active=False) + + if use_se: + out.append(SE(dw_channels, dw_channels, se_ratio)) + + out.append(nn.ReLU6()) + conv_bn_act(out, in_channels=dw_channels, channels=channels, active=False, relu6=True) + self.out = nn.Sequential(*out) + + def forward(self, x): + out = self.out(x) + if self.use_shortcut: + out[:, 0:self.in_channels] += x + + return out + + +@moduleinfo( + name="rexnet_1_0_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="rexnet_1_0_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.0.0", + meta=ImageClassifierModule) +class ReXNetV1(nn.Layer): + def __init__(self, + label_list: list = None, + load_checkpoint: str = None, + input_ch=16, + final_ch=180, + width_mult=1.0, + depth_mult=1.0, + class_dim=1000, + use_se=True, + se_ratio=12, + dropout_ratio=0.2, + bn_momentum=0.9): + + super(ReXNetV1, self).__init__() + + if label_list is not None: + self.labels = label_list + class_dim = len(self.labels) + else: + label_list = [] + label_file = os.path.join(self.directory, 'label_list.txt') + files = open(label_file) + for line in files.readlines(): + line = line.strip('\n') + label_list.append(line) + self.labels = label_list + class_dim = len(self.labels) + + layers = [1, 2, 2, 3, 3, 5] + strides = [1, 2, 2, 2, 1, 2] + use_ses = [False, False, True, True, True, True] + + layers = [ceil(element * depth_mult) for element in layers] + strides = sum([[element] + [1] * (layers[idx] - 1) for idx, element in enumerate(strides)], []) + if use_se: + use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], []) + else: + use_ses = [False] * sum(layers[:]) + ts = [1] * layers[0] + [6] * sum(layers[1:]) + + self.depth = sum(layers[:]) * 3 + stem_channel = 32 / width_mult if width_mult < 1.0 else 32 + inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch + + features = [] + in_channels_group = [] + channels_group = [] + + # The following channel configuration is a simple instance to make each layer become an expand layer. + for i in range(self.depth // 3): + if i == 0: + in_channels_group.append(int(round(stem_channel * width_mult))) + channels_group.append(int(round(inplanes * width_mult))) + else: + in_channels_group.append(int(round(inplanes * width_mult))) + inplanes += final_ch / (self.depth // 3 * 1.0) + channels_group.append(int(round(inplanes * width_mult))) + + conv_bn_swish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1) + + for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)): + features.append(LinearBottleneck(in_channels=in_c, channels=c, t=t, stride=s, use_se=se, se_ratio=se_ratio)) + + pen_channels = int(1280 * width_mult) + conv_bn_swish(features, c, pen_channels) + + features.append(nn.AdaptiveAvgPool2D(1)) + self.features = nn.Sequential(*features) + self.output = nn.Sequential(nn.Dropout(dropout_ratio), nn.Conv2D(pen_channels, class_dim, 1, bias_attr=True)) + + if load_checkpoint is not None: + self.model_dict = paddle.load(load_checkpoint) + self.set_dict(self.model_dict) + print("load custom checkpoint success") + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + self.model_dict = paddle.load(checkpoint) + self.set_dict(self.model_dict) + print("load pretrained checkpoint success") + + def transforms(self, images: Union[str, np.ndarray]): + transforms = T.Compose([ + T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ], + to_rgb=True) + return transforms(images).astype('float32') + + def forward(self, x): + feat = self.features(x) + x = self.output(feat).squeeze(axis=-1).squeeze(axis=-1) + return x, feat diff --git a/modules/image/classification/rexnet_1_3_imagenet/README.md b/modules/image/classification/rexnet_1_3_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7d19e797c3a15403d07c88f542bb6829e2cb80eb --- /dev/null +++ b/modules/image/classification/rexnet_1_3_imagenet/README.md @@ -0,0 +1,181 @@ +# rexnet_1_3_imagenet + +|模型名称|rexnet_1_3_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ReXNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|44MB| +|指标|-| +|最新更新日期|2021-09-14| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - ReXNet 由 NAVER AI Lab 提出的基于新的网络设计原则而设计的网络。作者针对现有网络中具有代表性的瓶颈问题,提出了一套设计原则,他们认为,常规设计会产生代表性瓶颈,这会影响模型性能。为了研究表征瓶颈,作者研究了由一万个随机网络生成的特征的矩阵秩。此外,还研究了整个层的通道配置以设计更准确的网络架构。最后,作者提出了一套简单有效的设计原则来缓解表征瓶颈。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install rexnet_1_3_imagenet + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + $ hub run rexnet_1_3_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 + + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_1_3_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用rexnet_1_3_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` + + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name="rexnet_1_3_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 + + - Step4: 选择优化策略和运行配置 + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` + + + - 运行配置 + + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; + + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: + + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_1_3_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` + + + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线分类任务服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m rexnet_1_3_imagenet + ``` + + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/rexnet_1_3_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/classification/rexnet_1_3_imagenet/label_list.txt b/modules/image/classification/rexnet_1_3_imagenet/label_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..52baabc68e968dde482ca143728295355d83203a --- /dev/null +++ b/modules/image/classification/rexnet_1_3_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenters kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potters wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow ladys slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/modules/image/classification/rexnet_1_3_imagenet/module.py b/modules/image/classification/rexnet_1_3_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..26c90180a1e124543938eef2f1bf282a32701587 --- /dev/null +++ b/modules/image/classification/rexnet_1_3_imagenet/module.py @@ -0,0 +1,196 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from math import ceil +from typing import Union + +import paddle +import paddle.nn as nn +import numpy as np +import paddlehub.vision.transforms as T +from paddle import ParamAttr +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def conv_bn_act(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1, active=True, relu6=False): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + if active: + out.append(nn.ReLU6() if relu6 else nn.ReLU()) + + +def conv_bn_swish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + out.append(nn.Swish()) + + +class SE(nn.Layer): + def __init__(self, in_channels, channels, se_ratio=12): + super(SE, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential( + nn.Conv2D(in_channels, channels // se_ratio, kernel_size=1, padding=0), + nn.BatchNorm2D(channels // se_ratio), nn.ReLU(), + nn.Conv2D(channels // se_ratio, channels, kernel_size=1, padding=0), nn.Sigmoid()) + + def forward(self, x): + y = self.avg_pool(x) + y = self.fc(y) + return x * y + + +class LinearBottleneck(nn.Layer): + def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12, **kwargs): + super(LinearBottleneck, self).__init__(**kwargs) + self.use_shortcut = stride == 1 and in_channels <= channels + self.in_channels = in_channels + self.out_channels = channels + + out = [] + if t != 1: + dw_channels = in_channels * t + conv_bn_swish(out, in_channels=in_channels, channels=dw_channels) + else: + dw_channels = in_channels + + conv_bn_act( + out, + in_channels=dw_channels, + channels=dw_channels, + kernel=3, + stride=stride, + pad=1, + num_group=dw_channels, + active=False) + + if use_se: + out.append(SE(dw_channels, dw_channels, se_ratio)) + + out.append(nn.ReLU6()) + conv_bn_act(out, in_channels=dw_channels, channels=channels, active=False, relu6=True) + self.out = nn.Sequential(*out) + + def forward(self, x): + out = self.out(x) + if self.use_shortcut: + out[:, 0:self.in_channels] += x + + return out + + +@moduleinfo( + name="rexnet_1_3_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="rexnet_1_3_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.0.0", + meta=ImageClassifierModule) +class ReXNetV1(nn.Layer): + def __init__(self, + label_list: list = None, + load_checkpoint: str = None, + input_ch=16, + final_ch=180, + width_mult=1.3, + depth_mult=1.0, + class_dim=1000, + use_se=True, + se_ratio=12, + dropout_ratio=0.2, + bn_momentum=0.9): + + super(ReXNetV1, self).__init__() + + if label_list is not None: + self.labels = label_list + class_dim = len(self.labels) + else: + label_list = [] + label_file = os.path.join(self.directory, 'label_list.txt') + files = open(label_file) + for line in files.readlines(): + line = line.strip('\n') + label_list.append(line) + self.labels = label_list + class_dim = len(self.labels) + + layers = [1, 2, 2, 3, 3, 5] + strides = [1, 2, 2, 2, 1, 2] + use_ses = [False, False, True, True, True, True] + + layers = [ceil(element * depth_mult) for element in layers] + strides = sum([[element] + [1] * (layers[idx] - 1) for idx, element in enumerate(strides)], []) + if use_se: + use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], []) + else: + use_ses = [False] * sum(layers[:]) + ts = [1] * layers[0] + [6] * sum(layers[1:]) + + self.depth = sum(layers[:]) * 3 + stem_channel = 32 / width_mult if width_mult < 1.0 else 32 + inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch + + features = [] + in_channels_group = [] + channels_group = [] + + # The following channel configuration is a simple instance to make each layer become an expand layer. + for i in range(self.depth // 3): + if i == 0: + in_channels_group.append(int(round(stem_channel * width_mult))) + channels_group.append(int(round(inplanes * width_mult))) + else: + in_channels_group.append(int(round(inplanes * width_mult))) + inplanes += final_ch / (self.depth // 3 * 1.0) + channels_group.append(int(round(inplanes * width_mult))) + + conv_bn_swish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1) + + for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)): + features.append(LinearBottleneck(in_channels=in_c, channels=c, t=t, stride=s, use_se=se, se_ratio=se_ratio)) + + pen_channels = int(1280 * width_mult) + conv_bn_swish(features, c, pen_channels) + + features.append(nn.AdaptiveAvgPool2D(1)) + self.features = nn.Sequential(*features) + self.output = nn.Sequential(nn.Dropout(dropout_ratio), nn.Conv2D(pen_channels, class_dim, 1, bias_attr=True)) + + if load_checkpoint is not None: + self.model_dict = paddle.load(load_checkpoint) + self.set_dict(self.model_dict) + print("load custom checkpoint success") + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + self.model_dict = paddle.load(checkpoint) + self.set_dict(self.model_dict) + print("load pretrained checkpoint success") + + def transforms(self, images: Union[str, np.ndarray]): + transforms = T.Compose([ + T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ], + to_rgb=True) + return transforms(images).astype('float32') + + def forward(self, x): + feat = self.features(x) + x = self.output(feat).squeeze(axis=-1).squeeze(axis=-1) + return x, feat diff --git a/modules/image/classification/rexnet_1_5_imagenet/README.md b/modules/image/classification/rexnet_1_5_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d0708cecc5f3378bfa7643bfa3c9d523f1a5b2ae --- /dev/null +++ b/modules/image/classification/rexnet_1_5_imagenet/README.md @@ -0,0 +1,181 @@ +# rexnet_1_5_imagenet + +|模型名称|rexnet_1_5_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ReXNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|57MB| +|指标|-| +|最新更新日期|2021-09-14| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - ReXNet 由 NAVER AI Lab 提出的基于新的网络设计原则而设计的网络。作者针对现有网络中具有代表性的瓶颈问题,提出了一套设计原则,他们认为,常规设计会产生代表性瓶颈,这会影响模型性能。为了研究表征瓶颈,作者研究了由一万个随机网络生成的特征的矩阵秩。此外,还研究了整个层的通道配置以设计更准确的网络架构。最后,作者提出了一套简单有效的设计原则来缓解表征瓶颈。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install rexnet_1_5_imagenet + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + $ hub run rexnet_1_5_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 + + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_1_5_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用rexnet_1_5_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` + + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name="rexnet_1_5_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 + + - Step4: 选择优化策略和运行配置 + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` + + + - 运行配置 + + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; + + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: + + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_1_5_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` + + + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线分类任务服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m rexnet_1_5_imagenet + ``` + + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/rexnet_1_5_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/classification/rexnet_1_5_imagenet/label_list.txt b/modules/image/classification/rexnet_1_5_imagenet/label_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..52baabc68e968dde482ca143728295355d83203a --- /dev/null +++ b/modules/image/classification/rexnet_1_5_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenters kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potters wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow ladys slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/modules/image/classification/rexnet_1_5_imagenet/module.py b/modules/image/classification/rexnet_1_5_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..bb3fa1202780c11d9f3123826a6591f175afbc64 --- /dev/null +++ b/modules/image/classification/rexnet_1_5_imagenet/module.py @@ -0,0 +1,196 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from math import ceil +from typing import Union + +import paddle +import paddle.nn as nn +import numpy as np +import paddlehub.vision.transforms as T +from paddle import ParamAttr +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def conv_bn_act(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1, active=True, relu6=False): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + if active: + out.append(nn.ReLU6() if relu6 else nn.ReLU()) + + +def conv_bn_swish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + out.append(nn.Swish()) + + +class SE(nn.Layer): + def __init__(self, in_channels, channels, se_ratio=12): + super(SE, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential( + nn.Conv2D(in_channels, channels // se_ratio, kernel_size=1, padding=0), + nn.BatchNorm2D(channels // se_ratio), nn.ReLU(), + nn.Conv2D(channels // se_ratio, channels, kernel_size=1, padding=0), nn.Sigmoid()) + + def forward(self, x): + y = self.avg_pool(x) + y = self.fc(y) + return x * y + + +class LinearBottleneck(nn.Layer): + def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12, **kwargs): + super(LinearBottleneck, self).__init__(**kwargs) + self.use_shortcut = stride == 1 and in_channels <= channels + self.in_channels = in_channels + self.out_channels = channels + + out = [] + if t != 1: + dw_channels = in_channels * t + conv_bn_swish(out, in_channels=in_channels, channels=dw_channels) + else: + dw_channels = in_channels + + conv_bn_act( + out, + in_channels=dw_channels, + channels=dw_channels, + kernel=3, + stride=stride, + pad=1, + num_group=dw_channels, + active=False) + + if use_se: + out.append(SE(dw_channels, dw_channels, se_ratio)) + + out.append(nn.ReLU6()) + conv_bn_act(out, in_channels=dw_channels, channels=channels, active=False, relu6=True) + self.out = nn.Sequential(*out) + + def forward(self, x): + out = self.out(x) + if self.use_shortcut: + out[:, 0:self.in_channels] += x + + return out + + +@moduleinfo( + name="rexnet_1_5_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="rexnet_1_5_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.0.0", + meta=ImageClassifierModule) +class ReXNetV1(nn.Layer): + def __init__(self, + label_list: list = None, + load_checkpoint: str = None, + input_ch=16, + final_ch=180, + width_mult=1.5, + depth_mult=1.0, + class_dim=1000, + use_se=True, + se_ratio=12, + dropout_ratio=0.2, + bn_momentum=0.9): + + super(ReXNetV1, self).__init__() + + if label_list is not None: + self.labels = label_list + class_dim = len(self.labels) + else: + label_list = [] + label_file = os.path.join(self.directory, 'label_list.txt') + files = open(label_file) + for line in files.readlines(): + line = line.strip('\n') + label_list.append(line) + self.labels = label_list + class_dim = len(self.labels) + + layers = [1, 2, 2, 3, 3, 5] + strides = [1, 2, 2, 2, 1, 2] + use_ses = [False, False, True, True, True, True] + + layers = [ceil(element * depth_mult) for element in layers] + strides = sum([[element] + [1] * (layers[idx] - 1) for idx, element in enumerate(strides)], []) + if use_se: + use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], []) + else: + use_ses = [False] * sum(layers[:]) + ts = [1] * layers[0] + [6] * sum(layers[1:]) + + self.depth = sum(layers[:]) * 3 + stem_channel = 32 / width_mult if width_mult < 1.0 else 32 + inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch + + features = [] + in_channels_group = [] + channels_group = [] + + # The following channel configuration is a simple instance to make each layer become an expand layer. + for i in range(self.depth // 3): + if i == 0: + in_channels_group.append(int(round(stem_channel * width_mult))) + channels_group.append(int(round(inplanes * width_mult))) + else: + in_channels_group.append(int(round(inplanes * width_mult))) + inplanes += final_ch / (self.depth // 3 * 1.0) + channels_group.append(int(round(inplanes * width_mult))) + + conv_bn_swish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1) + + for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)): + features.append(LinearBottleneck(in_channels=in_c, channels=c, t=t, stride=s, use_se=se, se_ratio=se_ratio)) + + pen_channels = int(1280 * width_mult) + conv_bn_swish(features, c, pen_channels) + + features.append(nn.AdaptiveAvgPool2D(1)) + self.features = nn.Sequential(*features) + self.output = nn.Sequential(nn.Dropout(dropout_ratio), nn.Conv2D(pen_channels, class_dim, 1, bias_attr=True)) + + if load_checkpoint is not None: + self.model_dict = paddle.load(load_checkpoint) + self.set_dict(self.model_dict) + print("load custom checkpoint success") + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + self.model_dict = paddle.load(checkpoint) + self.set_dict(self.model_dict) + print("load pretrained checkpoint success") + + def transforms(self, images: Union[str, np.ndarray]): + transforms = T.Compose([ + T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ], + to_rgb=True) + return transforms(images).astype('float32') + + def forward(self, x): + feat = self.features(x) + x = self.output(feat).squeeze(axis=-1).squeeze(axis=-1) + return x, feat diff --git a/modules/image/classification/rexnet_2_0_imagenet/README.md b/modules/image/classification/rexnet_2_0_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3f1285b6ad9441d81cd7deb0ed68923420ecaa94 --- /dev/null +++ b/modules/image/classification/rexnet_2_0_imagenet/README.md @@ -0,0 +1,181 @@ +# rexnet_2_0_imagenet + +|模型名称|rexnet_2_0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ReXNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|95MB| +|指标|-| +|最新更新日期|2021-09-14| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - ReXNet 由 NAVER AI Lab 提出的基于新的网络设计原则而设计的网络。作者针对现有网络中具有代表性的瓶颈问题,提出了一套设计原则,他们认为,常规设计会产生代表性瓶颈,这会影响模型性能。为了研究表征瓶颈,作者研究了由一万个随机网络生成的特征的矩阵秩。此外,还研究了整个层的通道配置以设计更准确的网络架构。最后,作者提出了一套简单有效的设计原则来缓解表征瓶颈。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install rexnet_2_0_imagenet + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + $ hub run rexnet_2_0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 + + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_2_0_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用rexnet_2_0_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` + + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name="rexnet_2_0_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 + + - Step4: 选择优化策略和运行配置 + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` + + + - 运行配置 + + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; + + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: + + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_2_0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` + + + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线分类任务服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m rexnet_2_0_imagenet + ``` + + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/rexnet_2_0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/classification/rexnet_2_0_imagenet/label_list.txt b/modules/image/classification/rexnet_2_0_imagenet/label_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..52baabc68e968dde482ca143728295355d83203a --- /dev/null +++ b/modules/image/classification/rexnet_2_0_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenters kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potters wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow ladys slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/modules/image/classification/rexnet_2_0_imagenet/module.py b/modules/image/classification/rexnet_2_0_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..abcec2418119a7a016df5bcb01f720730a1f7e31 --- /dev/null +++ b/modules/image/classification/rexnet_2_0_imagenet/module.py @@ -0,0 +1,196 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from math import ceil +from typing import Union + +import paddle +import paddle.nn as nn +import numpy as np +import paddlehub.vision.transforms as T +from paddle import ParamAttr +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def conv_bn_act(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1, active=True, relu6=False): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + if active: + out.append(nn.ReLU6() if relu6 else nn.ReLU()) + + +def conv_bn_swish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + out.append(nn.Swish()) + + +class SE(nn.Layer): + def __init__(self, in_channels, channels, se_ratio=12): + super(SE, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential( + nn.Conv2D(in_channels, channels // se_ratio, kernel_size=1, padding=0), + nn.BatchNorm2D(channels // se_ratio), nn.ReLU(), + nn.Conv2D(channels // se_ratio, channels, kernel_size=1, padding=0), nn.Sigmoid()) + + def forward(self, x): + y = self.avg_pool(x) + y = self.fc(y) + return x * y + + +class LinearBottleneck(nn.Layer): + def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12, **kwargs): + super(LinearBottleneck, self).__init__(**kwargs) + self.use_shortcut = stride == 1 and in_channels <= channels + self.in_channels = in_channels + self.out_channels = channels + + out = [] + if t != 1: + dw_channels = in_channels * t + conv_bn_swish(out, in_channels=in_channels, channels=dw_channels) + else: + dw_channels = in_channels + + conv_bn_act( + out, + in_channels=dw_channels, + channels=dw_channels, + kernel=3, + stride=stride, + pad=1, + num_group=dw_channels, + active=False) + + if use_se: + out.append(SE(dw_channels, dw_channels, se_ratio)) + + out.append(nn.ReLU6()) + conv_bn_act(out, in_channels=dw_channels, channels=channels, active=False, relu6=True) + self.out = nn.Sequential(*out) + + def forward(self, x): + out = self.out(x) + if self.use_shortcut: + out[:, 0:self.in_channels] += x + + return out + + +@moduleinfo( + name="rexnet_2_0_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="rexnet_2_0_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.0.0", + meta=ImageClassifierModule) +class ReXNetV1(nn.Layer): + def __init__(self, + label_list: list = None, + load_checkpoint: str = None, + input_ch=16, + final_ch=180, + width_mult=2.0, + depth_mult=1.0, + class_dim=1000, + use_se=True, + se_ratio=12, + dropout_ratio=0.2, + bn_momentum=0.9): + + super(ReXNetV1, self).__init__() + + if label_list is not None: + self.labels = label_list + class_dim = len(self.labels) + else: + label_list = [] + label_file = os.path.join(self.directory, 'label_list.txt') + files = open(label_file) + for line in files.readlines(): + line = line.strip('\n') + label_list.append(line) + self.labels = label_list + class_dim = len(self.labels) + + layers = [1, 2, 2, 3, 3, 5] + strides = [1, 2, 2, 2, 1, 2] + use_ses = [False, False, True, True, True, True] + + layers = [ceil(element * depth_mult) for element in layers] + strides = sum([[element] + [1] * (layers[idx] - 1) for idx, element in enumerate(strides)], []) + if use_se: + use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], []) + else: + use_ses = [False] * sum(layers[:]) + ts = [1] * layers[0] + [6] * sum(layers[1:]) + + self.depth = sum(layers[:]) * 3 + stem_channel = 32 / width_mult if width_mult < 1.0 else 32 + inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch + + features = [] + in_channels_group = [] + channels_group = [] + + # The following channel configuration is a simple instance to make each layer become an expand layer. + for i in range(self.depth // 3): + if i == 0: + in_channels_group.append(int(round(stem_channel * width_mult))) + channels_group.append(int(round(inplanes * width_mult))) + else: + in_channels_group.append(int(round(inplanes * width_mult))) + inplanes += final_ch / (self.depth // 3 * 1.0) + channels_group.append(int(round(inplanes * width_mult))) + + conv_bn_swish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1) + + for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)): + features.append(LinearBottleneck(in_channels=in_c, channels=c, t=t, stride=s, use_se=se, se_ratio=se_ratio)) + + pen_channels = int(1280 * width_mult) + conv_bn_swish(features, c, pen_channels) + + features.append(nn.AdaptiveAvgPool2D(1)) + self.features = nn.Sequential(*features) + self.output = nn.Sequential(nn.Dropout(dropout_ratio), nn.Conv2D(pen_channels, class_dim, 1, bias_attr=True)) + + if load_checkpoint is not None: + self.model_dict = paddle.load(load_checkpoint) + self.set_dict(self.model_dict) + print("load custom checkpoint success") + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + self.model_dict = paddle.load(checkpoint) + self.set_dict(self.model_dict) + print("load pretrained checkpoint success") + + def transforms(self, images: Union[str, np.ndarray]): + transforms = T.Compose([ + T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ], + to_rgb=True) + return transforms(images).astype('float32') + + def forward(self, x): + feat = self.features(x) + x = self.output(feat).squeeze(axis=-1).squeeze(axis=-1) + return x, feat diff --git a/modules/image/classification/rexnet_3_0_imagenet/README.md b/modules/image/classification/rexnet_3_0_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..934812ac4dae247b2e80f033e0778a896c8377a8 --- /dev/null +++ b/modules/image/classification/rexnet_3_0_imagenet/README.md @@ -0,0 +1,181 @@ +# rexnet_3_0_imagenet + +|模型名称|rexnet_3_0_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ReXNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|200MB| +|指标|-| +|最新更新日期|2021-09-14| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - ReXNet 由 NAVER AI Lab 提出的基于新的网络设计原则而设计的网络。作者针对现有网络中具有代表性的瓶颈问题,提出了一套设计原则,他们认为,常规设计会产生代表性瓶颈,这会影响模型性能。为了研究表征瓶颈,作者研究了由一万个随机网络生成的特征的矩阵秩。此外,还研究了整个层的通道配置以设计更准确的网络架构。最后,作者提出了一套简单有效的设计原则来缓解表征瓶颈。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install rexnet_3_0_imagenet + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + $ hub run rexnet_3_0_imagenet --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 + + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_3_0_imagenet') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune + + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用rexnet_3_0_imagenet对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 + + - 代码步骤 + + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` + + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` + + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + + + - Step3: 加载预训练模型 + + - ```python + model = hub.Module(name="rexnet_3_0_imagenet", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 + + - Step4: 选择优化策略和运行配置 + + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` + + + - 运行配置 + + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; + + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + + - 模型预测 + + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: + + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='rexnet_3_0_imagenet', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` + + + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线分类任务服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m rexnet_3_0_imagenet + ``` + + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/rexnet_3_0_imagenet" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/classification/rexnet_3_0_imagenet/label_list.txt b/modules/image/classification/rexnet_3_0_imagenet/label_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..52baabc68e968dde482ca143728295355d83203a --- /dev/null +++ b/modules/image/classification/rexnet_3_0_imagenet/label_list.txt @@ -0,0 +1,1000 @@ +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenters kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potters wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow ladys slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/modules/image/classification/rexnet_3_0_imagenet/module.py b/modules/image/classification/rexnet_3_0_imagenet/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e3dd626af395245abefe72fba57070fe2d3c8009 --- /dev/null +++ b/modules/image/classification/rexnet_3_0_imagenet/module.py @@ -0,0 +1,196 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from math import ceil +from typing import Union + +import paddle +import paddle.nn as nn +import numpy as np +import paddlehub.vision.transforms as T +from paddle import ParamAttr +from paddlehub.module.module import moduleinfo +from paddlehub.module.cv_module import ImageClassifierModule + + +def conv_bn_act(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1, active=True, relu6=False): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + if active: + out.append(nn.ReLU6() if relu6 else nn.ReLU()) + + +def conv_bn_swish(out, in_channels, channels, kernel=1, stride=1, pad=0, num_group=1): + out.append(nn.Conv2D(in_channels, channels, kernel, stride, pad, groups=num_group, bias_attr=False)) + out.append(nn.BatchNorm2D(channels)) + out.append(nn.Swish()) + + +class SE(nn.Layer): + def __init__(self, in_channels, channels, se_ratio=12): + super(SE, self).__init__() + self.avg_pool = nn.AdaptiveAvgPool2D(1) + self.fc = nn.Sequential( + nn.Conv2D(in_channels, channels // se_ratio, kernel_size=1, padding=0), + nn.BatchNorm2D(channels // se_ratio), nn.ReLU(), + nn.Conv2D(channels // se_ratio, channels, kernel_size=1, padding=0), nn.Sigmoid()) + + def forward(self, x): + y = self.avg_pool(x) + y = self.fc(y) + return x * y + + +class LinearBottleneck(nn.Layer): + def __init__(self, in_channels, channels, t, stride, use_se=True, se_ratio=12, **kwargs): + super(LinearBottleneck, self).__init__(**kwargs) + self.use_shortcut = stride == 1 and in_channels <= channels + self.in_channels = in_channels + self.out_channels = channels + + out = [] + if t != 1: + dw_channels = in_channels * t + conv_bn_swish(out, in_channels=in_channels, channels=dw_channels) + else: + dw_channels = in_channels + + conv_bn_act( + out, + in_channels=dw_channels, + channels=dw_channels, + kernel=3, + stride=stride, + pad=1, + num_group=dw_channels, + active=False) + + if use_se: + out.append(SE(dw_channels, dw_channels, se_ratio)) + + out.append(nn.ReLU6()) + conv_bn_act(out, in_channels=dw_channels, channels=channels, active=False, relu6=True) + self.out = nn.Sequential(*out) + + def forward(self, x): + out = self.out(x) + if self.use_shortcut: + out[:, 0:self.in_channels] += x + + return out + + +@moduleinfo( + name="rexnet_3_0_imagenet", + type="CV/classification", + author="paddlepaddle", + author_email="", + summary="rexnet_3_0_imagenet is a classification model, " + "this module is trained with Imagenet dataset.", + version="1.0.0", + meta=ImageClassifierModule) +class ReXNetV1(nn.Layer): + def __init__(self, + label_list: list = None, + load_checkpoint: str = None, + input_ch=16, + final_ch=180, + width_mult=3.0, + depth_mult=1.0, + class_dim=1000, + use_se=True, + se_ratio=12, + dropout_ratio=0.2, + bn_momentum=0.9): + + super(ReXNetV1, self).__init__() + + if label_list is not None: + self.labels = label_list + class_dim = len(self.labels) + else: + label_list = [] + label_file = os.path.join(self.directory, 'label_list.txt') + files = open(label_file) + for line in files.readlines(): + line = line.strip('\n') + label_list.append(line) + self.labels = label_list + class_dim = len(self.labels) + + layers = [1, 2, 2, 3, 3, 5] + strides = [1, 2, 2, 2, 1, 2] + use_ses = [False, False, True, True, True, True] + + layers = [ceil(element * depth_mult) for element in layers] + strides = sum([[element] + [1] * (layers[idx] - 1) for idx, element in enumerate(strides)], []) + if use_se: + use_ses = sum([[element] * layers[idx] for idx, element in enumerate(use_ses)], []) + else: + use_ses = [False] * sum(layers[:]) + ts = [1] * layers[0] + [6] * sum(layers[1:]) + + self.depth = sum(layers[:]) * 3 + stem_channel = 32 / width_mult if width_mult < 1.0 else 32 + inplanes = input_ch / width_mult if width_mult < 1.0 else input_ch + + features = [] + in_channels_group = [] + channels_group = [] + + # The following channel configuration is a simple instance to make each layer become an expand layer. + for i in range(self.depth // 3): + if i == 0: + in_channels_group.append(int(round(stem_channel * width_mult))) + channels_group.append(int(round(inplanes * width_mult))) + else: + in_channels_group.append(int(round(inplanes * width_mult))) + inplanes += final_ch / (self.depth // 3 * 1.0) + channels_group.append(int(round(inplanes * width_mult))) + + conv_bn_swish(features, 3, int(round(stem_channel * width_mult)), kernel=3, stride=2, pad=1) + + for block_idx, (in_c, c, t, s, se) in enumerate(zip(in_channels_group, channels_group, ts, strides, use_ses)): + features.append(LinearBottleneck(in_channels=in_c, channels=c, t=t, stride=s, use_se=se, se_ratio=se_ratio)) + + pen_channels = int(1280 * width_mult) + conv_bn_swish(features, c, pen_channels) + + features.append(nn.AdaptiveAvgPool2D(1)) + self.features = nn.Sequential(*features) + self.output = nn.Sequential(nn.Dropout(dropout_ratio), nn.Conv2D(pen_channels, class_dim, 1, bias_attr=True)) + + if load_checkpoint is not None: + self.model_dict = paddle.load(load_checkpoint) + self.set_dict(self.model_dict) + print("load custom checkpoint success") + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + self.model_dict = paddle.load(checkpoint) + self.set_dict(self.model_dict) + print("load pretrained checkpoint success") + + def transforms(self, images: Union[str, np.ndarray]): + transforms = T.Compose([ + T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + ], + to_rgb=True) + return transforms(images).astype('float32') + + def forward(self, x): + feat = self.features(x) + x = self.output(feat).squeeze(axis=-1).squeeze(axis=-1) + return x, feat diff --git a/modules/image/classification/se_hrnet64_imagenet_ssld/README.md b/modules/image/classification/se_hrnet64_imagenet_ssld/README.md index fd20f3862ecc053a12274fa2f5f4cbaa8ed41cf4..65289a511405a4a840d5100935e27eee58dd0524 100644 --- a/modules/image/classification/se_hrnet64_imagenet_ssld/README.md +++ b/modules/image/classification/se_hrnet64_imagenet_ssld/README.md @@ -1,192 +1,181 @@ -```shell -$ hub install se_hrnet64_imagenet_ssld==1.0.0 -``` +# se_hrnet64_imagenet_ssld -## 命令行预测 +|模型名称|se_hrnet64_imagenet_ssld| +| :--- | :---: | +|类别|图像-图像分类| +|网络|HRNet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|是| +|模型大小|493MB| +|指标|-| +|最新更新日期|2021-09-14| -```shell -$ hub run se_hrnet64_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 -``` -## 脚本预测 +## 一、模型基本信息 -```python -import paddle -import paddlehub as hub +- ### 模型介绍 -if __name__ == '__main__': + - HRNet是微软亚洲研究院在2019年提出的全新神经网络。与之前的卷积神经网络不同,这个网络在网络的深层依然可以保持高分辨率,所以预测的关键点的热图更加准确,而且在空间上也更加准确。此外,该网络在其他对分辨率敏感的视觉任务中表现特别好,例如检测和分割。 - model = hub.Module(name='se_hrnet64_imagenet_ssld',) - result = model.predict([PATH/TO/IMAGE]) -``` -## Fine-tune代码步骤 +## 二、安装 -使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 +- ### 1、环境依赖 -### Step1: 定义数据预处理方式 -```python -import paddlehub.vision.transforms as T + - paddlepaddle >= 2.0.0 -transforms = T.Compose([T.Resize((256, 256)), - T.CenterCrop(224), - T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], - to_rgb=True) -``` + - paddlehub >= 2.0.0 -'transforms' 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 +- ### 2、安装 + - ```shell + $ hub install se_hrnet64_imagenet_ssld + ``` -### Step2: 下载数据集并使用 -```python -from paddlehub.datasets import Flowers + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -flowers = Flowers(transforms) -flowers_validate = Flowers(transforms, mode='val') -``` -* transforms(Callable): 数据预处理方式。 -* mode(str): 选择数据模式,可选项有 'train', 'test', 'val', 默认为'train'。 +## 三、模型API预测 -'hub.datasets.Flowers()' 会自动从网络下载数据集并解压到用户目录下'$HOME/.paddlehub/dataset'目录。 +- ### 1.命令行预测 + ```shell + $ hub run se_hrnet64_imagenet_ssld --input_path "/PATH/TO/IMAGE" --top_k 5 + ``` +- ### 2.预测代码示例 -### Step3: 加载预训练模型 + ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='se_hrnet64_imagenet_ssld') + result = model.predict(['flower.jpg']) + ``` +- ### 3.如何开始Fine-tune -```python -import paddlehub as hub + - 在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用se_hrnet64_imagenet_ssld对[Flowers](../../docs/reference/datasets.md#class-hubdatasetsflowers)等数据集进行Fine-tune。 -model = hub.Module(name='se_hrnet64_imagenet_ssld', - label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], - load_checkpoint=None) -``` -* name(str): 选择预训练模型的名字。 -* label_list(list): 设置标签对应分类类别, 默认为Imagenet2012类别。 -* load _checkpoint(str): 模型参数地址。 + - 代码步骤 -PaddleHub提供许多图像分类预训练模型,如xception、mobilenet、efficientnet等,详细信息参见[图像分类模型](https://www.paddlepaddle.org.cn/hub?filter=en_category&value=ImageClassification)。 + - Step1: 定义数据预处理方式 + - ```python + import paddlehub.vision.transforms as T + transforms = T.Compose([T.Resize((256, 256)), + T.CenterCrop(224), + T.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])], + to_rgb=True) + ``` -如果想尝试efficientnet模型,只需要更换Module中的'name'参数即可. -```python -import paddlehub as hub + - `transforms` 数据增强模块定义了丰富的数据预处理方式,用户可按照需求替换自己需要的数据预处理方式。 -# 更换name参数即可无缝切换efficientnet模型, 代码示例如下 -module = hub.Module(name="efficientnetb7_imagenet") -``` -**NOTE:**目前部分模型还没有完全升级到2.0版本,敬请期待。 + - Step2: 下载数据集并使用 + - ```python + from paddlehub.datasets import Flowers + flowers = Flowers(transforms) + flowers_validate = Flowers(transforms, mode='val') + ``` -### Step4: 选择优化策略和运行配置 + * `transforms`: 数据预处理方式。 + * `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 -```python -import paddle -from paddlehub.finetune.trainer import Trainer + * 数据集的准备代码可以参考 [flowers.py](../../paddlehub/datasets/flowers.py)。`hub.datasets.Flowers()` 会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 -optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) -trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') -trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) -``` + - Step3: 加载预训练模型 -#### 优化策略 + - ```python + model = hub.Module(name="se_hrnet64_imagenet_ssld", label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"]) + ``` + * `name`: 选择预训练模型的名字。 + * `label_list`: 设置输出分类类别,默认为Imagenet2012类别。 -Paddle2.0rc提供了多种优化器选择,如'SGD', 'Adam', 'Adamax'等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + - Step4: 选择优化策略和运行配置 -其中'Adam': + ```python + optimizer = paddle.optimizer.Adam(learning_rate=0.001, parameters=model.parameters()) + trainer = Trainer(model, optimizer, checkpoint_dir='img_classification_ckpt') + trainer.train(flowers, epochs=100, batch_size=32, eval_dataset=flowers_validate, save_interval=1) + ``` -* learning_rate: 全局学习率。默认为1e-3; -* parameters: 待优化模型参数。 -#### 运行配置 -'Trainer' 主要控制Fine-tune的训练,包含以下可控制的参数: + - 运行配置 -* model: 被优化模型; -* optimizer: 优化器选择; -* use_vdl: 是否使用vdl可视化训练过程; -* checkpoint_dir: 保存模型参数的地址; -* compare_metrics: 保存最优模型的衡量指标; + - `Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: -'trainer.train' 主要控制具体的训练过程,包含以下可控制的参数: + * `model`: 被优化模型; + * `optimizer`: 优化器选择; + * `use_vdl`: 是否使用vdl可视化训练过程; + * `checkpoint_dir`: 保存模型参数的地址; + * `compare_metrics`: 保存最优模型的衡量指标; -* train_dataset: 训练时所用的数据集; -* epochs: 训练轮数; -* batch_size: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; -* num_workers: works的数量,默认为0; -* eval_dataset: 验证集; -* log_interval: 打印日志的间隔, 单位为执行批训练的次数。 -* save_interval: 保存模型的间隔频次,单位为执行训练的轮数。 + - `trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: -## 模型预测 + * `train_dataset`: 训练时所用的数据集; + * `epochs`: 训练轮数; + * `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; + * `num_workers`: works的数量,默认为0; + * `eval_dataset`: 验证集; + * `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 + * `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 -当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在'${CHECKPOINT_DIR}/best_model'目录下,其中'${CHECKPOINT_DIR}'目录为Fine-tune时所选择的保存checkpoint的目录。 -我们使用该模型来进行预测。predict.py脚本如下: + - 模型预测 -```python -import paddle -import paddlehub as hub + - 当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 我们使用该模型来进行预测。predict.py脚本如下: -if __name__ == '__main__': + - ```python + import paddle + import paddlehub as hub + if __name__ == '__main__': + model = hub.Module(name='se_hrnet64_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') + result = model.predict(['flower.jpg']) + ``` - model = hub.Module(name='se_hrnet64_imagenet_ssld', label_list=["roses", "tulips", "daisy", "sunflowers", "dandelion"], load_checkpoint='/PATH/TO/CHECKPOINT') - result = model.predict(['flower.jpg']) -``` -参数配置正确后,请执行脚本'python predict.py', 加载模型具体可参见[加载](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/framework/io/load_cn.html#load)。 + - **NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 -**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 +## 四、服务部署 -## 服务部署 +- PaddleHub Serving可以部署一个在线分类任务服务。 -PaddleHub Serving可以部署一个在线分类任务服务 +- ### 第一步:启动PaddleHub Serving -## Step1: 启动PaddleHub Serving + - 运行启动命令: -运行启动命令: + - ```shell + $ hub serving start -m se_hrnet64_imagenet_ssld + ``` -```shell -$ hub serving start -m se_hrnet64_imagenet_ssld -``` + - 这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 -这样就完成了一个分类任务服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## Step2: 发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)], 'top_k':2} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/se_hrnet64_imagenet_ssld" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + data =r.json()["results"]['data'] + ``` +## 五、更新历史 -```python -import requests -import json -import cv2 -import base64 +* 1.0.0 -import numpy as np - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') - -data = {'images':[cv2_to_base64(org_im)], 'top_k':2} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/se_hrnet64_imagenet_ssld" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -data =r.json()["results"]['data'] -``` - -### 查看代码 - -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - -### 依赖 - -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 + 初始发布 diff --git a/modules/image/classification/se_resnet18_vd_imagenet/README.md b/modules/image/classification/se_resnet18_vd_imagenet/README.md index 2b1b1c23b4042e09792f91f72718c1abe19746ed..b1c11fed0f84cd73fcda65dc928e2405676b725b 100644 --- a/modules/image/classification/se_resnet18_vd_imagenet/README.md +++ b/modules/image/classification/se_resnet18_vd_imagenet/README.md @@ -84,7 +84,7 @@ def save_inference_model(dirname, * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) * combined: 是否将参数保存到统一的一个文件中 -## 代码示例 +## 预测代码示例 ```python import paddlehub as hub diff --git a/modules/image/classification/se_resnext101_32x4d_imagenet/README.md b/modules/image/classification/se_resnext101_32x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1f2ac07a5a923e6634f70446816a6aa22516dab0 --- /dev/null +++ b/modules/image/classification/se_resnext101_32x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# se_resnext101_32x4d_imagenet + +|模型名称|se_resnext101_32x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|SE_ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|191MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - Squeeze-and-Excitation Networks是由Momenta在2017年提出的一种图像分类结构。该结构通过对特征通道间的相关性进行建模,把重要的特征进行强化来提升准确率。SE_ResNeXt基于ResNeXt模型添加了SE Block,并获得了2017 ILSVR竞赛的冠军。该PaddleHub Module结构为SE_ResNeXt101_32x4d,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install se_resnext101_32x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run se_resnext101_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="se_resnext101_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install se_resnext101_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/se_resnext50_32x4d_imagenet/README.md b/modules/image/classification/se_resnext50_32x4d_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..05c4020f3ac5257c4324da1b4c6eeb167c522f2a --- /dev/null +++ b/modules/image/classification/se_resnext50_32x4d_imagenet/README.md @@ -0,0 +1,84 @@ +# se_resnext50_32x4d_imagenet + +|模型名称|se_resnext50_32x4d_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|SE_ResNeXt| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|107MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - Squeeze-and-Excitation Networks是由Momenta在2017年提出的一种图像分类结构。该结构通过对特征通道间的相关性进行建模,把重要的特征进行强化来提升准确率。SE_ResNeXt基于ResNeXt模型添加了SE Block,并获得了2017 ILSVR竞赛的冠军。该PaddleHub Module结构为SE_ResNeXt50_32x4d,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install se_resnext50_32x4d_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run se_resnext50_32x4d_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="se_resnext50_32x4d_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install se_resnext50_32x4d_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/shufflenet_v2_imagenet/README.md b/modules/image/classification/shufflenet_v2_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3e372c9600c8c2ebf017a5cfb7d8e6c5baf55df6 --- /dev/null +++ b/modules/image/classification/shufflenet_v2_imagenet/README.md @@ -0,0 +1,84 @@ +# shufflenet_v2_imagenet + +|模型名称|shufflenet_v2_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|ShuffleNet V2| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|11MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - ShuffleNet V2是由旷视科技在2018年提出的轻量级图像分类模型,该模型通过pointwise group convolution和channel shuffle两种方式,在保持精度的同时大大降低了模型的计算量。该PaddleHub Module结构为ShuffleNet V2,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install shufflenet_v2_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run shufflenet_v2_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="shufflenet_v2_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install shufflenet_v2_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/spinalnet_res101_gemstone/README.md b/modules/image/classification/spinalnet_res101_gemstone/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bd785bda5c4deffdecdf0292d3ff7e6965b15ff5 --- /dev/null +++ b/modules/image/classification/spinalnet_res101_gemstone/README.md @@ -0,0 +1,81 @@ +# spinalnet_res101_gemstone + +|模型名称|spinalnet_res101_gemstone| +| :--- | :---: | +|类别|图像-图像分类| +|网络|resnet101| +|数据集|gemstone| +|是否支持Fine-tuning|否| +|模型大小|246MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 使用PaddleHub的SpinalNet预训练模型进行宝石识别或finetune并完成宝石的预测任务。 +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install spinalnet_res101_gemstone + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run spinalnet_res101_gemstone --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="spinalnet_res101_gemstone") + result = classifier.predict(['/PATH/TO/IMAGE']) + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - 分类接口API。 + - **参数** + - images: list类型,待预测的图像。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install spinalnet_res101_gemstone==1.0.0 + ``` diff --git a/modules/image/classification/spinalnet_res50_gemstone/README.md b/modules/image/classification/spinalnet_res50_gemstone/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ed97788f71ae95deebe9fd3ec83d2f08bb6bd56f --- /dev/null +++ b/modules/image/classification/spinalnet_res50_gemstone/README.md @@ -0,0 +1,81 @@ +# spinalnet_res50_gemstone + +|模型名称|spinalnet_res50_gemstone| +| :--- | :---: | +|类别|图像-图像分类| +|网络|resnet50| +|数据集|gemstone| +|是否支持Fine-tuning|否| +|模型大小|137MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 使用PaddleHub的SpinalNet预训练模型进行宝石识别或finetune并完成宝石的预测任务。 +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install spinalnet_res50_gemstone + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run spinalnet_res50_gemstone --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="spinalnet_res50_gemstone") + result = classifier.predict(['/PATH/TO/IMAGE']) + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - 分类接口API。 + - **参数** + - images: list类型,待预测的图像。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install spinalnet_res50_gemstone==1.0.0 + ``` diff --git a/modules/image/classification/spinalnet_vgg16_gemstone/README.md b/modules/image/classification/spinalnet_vgg16_gemstone/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5ca6eacd550179c5cb0c838d0c2451eb3d61f02f --- /dev/null +++ b/modules/image/classification/spinalnet_vgg16_gemstone/README.md @@ -0,0 +1,81 @@ +# spinalnet_vgg16_gemstone + +|模型名称|spinalnet_vgg16_gemstone| +| :--- | :---: | +|类别|图像-图像分类| +|网络|vgg16| +|数据集|gemstone| +|是否支持Fine-tuning|否| +|模型大小|1.5GB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - 使用PaddleHub的SpinalNet预训练模型进行宝石识别或finetune并完成宝石的预测任务。 +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install spinalnet_vgg16_gemstone + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run spinalnet_vgg16_gemstone --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="spinalnet_vgg16_gemstone") + result = classifier.predict(['/PATH/TO/IMAGE']) + print(result) + ``` + +- ### 3、API + + - ```python + def predict(images) + ``` + - 分类接口API。 + - **参数** + - images: list类型,待预测的图像。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install spinalnet_vgg16_gemstone==1.0.0 + ``` diff --git a/modules/image/classification/vgg11_imagenet/README.md b/modules/image/classification/vgg11_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2905883511483c62b3a94907c21aa87994500a19 --- /dev/null +++ b/modules/image/classification/vgg11_imagenet/README.md @@ -0,0 +1,84 @@ +# vgg11_imagenet + +|模型名称|vgg11_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|VGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|507MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - VGG是牛津大学计算机视觉组和DeepMind在2014年提出的一种图像分类模型。该系列模型探索了卷积神经网络的深度与其性能之间的关系,通过实验证明了增加网络的深度能够在一定程度上影响网络最终的性能,到目前为止,VGG仍然被许多其他图像任务用作特征提取的BackBone网络。该PaddleHub Module结构为VGG11,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install vgg11_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run vgg11_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg11_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install vgg11_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/vgg13_imagenet/README.md b/modules/image/classification/vgg13_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2f967b7f19bec47d6b5695fbaccae6086a98cee7 --- /dev/null +++ b/modules/image/classification/vgg13_imagenet/README.md @@ -0,0 +1,84 @@ +# vgg13_imagenet + +|模型名称|vgg13_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|VGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|508MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - VGG是牛津大学计算机视觉组和DeepMind在2014年提出的一种图像分类模型。该系列模型探索了卷积神经网络的深度与其性能之间的关系,通过实验证明了增加网络的深度能够在一定程度上影响网络最终的性能,到目前为止,VGG仍然被许多其他图像任务用作特征提取的BackBone网络。该PaddleHub Module结构为VGG13,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install vgg13_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run vgg13_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg13_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install vgg13_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/vgg16_imagenet/README.md b/modules/image/classification/vgg16_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..14186cec20232f3d9562620bd1b062082f004b78 --- /dev/null +++ b/modules/image/classification/vgg16_imagenet/README.md @@ -0,0 +1,84 @@ +# vgg16_imagenet + +|模型名称|vgg16_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|VGG| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|528MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - VGG是牛津大学计算机视觉组和DeepMind在2014年提出的一种图像分类模型。该系列模型探索了卷积神经网络的深度与其性能之间的关系,通过实验证明了增加网络的深度能够在一定程度上影响网络最终的性能,到目前为止,VGG仍然被许多其他图像任务用作特征提取的BackBone网络。该PaddleHub Module结构为VGG16,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install vgg16_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run vgg16_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg16_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install vgg16_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/vgg19_imagenet/README.md b/modules/image/classification/vgg19_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..3ecf4e2bff64c4558c68dc89688cab18748313bb --- /dev/null +++ b/modules/image/classification/vgg19_imagenet/README.md @@ -0,0 +1,84 @@ +# vgg19_imagenet + +|模型名称|vgg19_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|vgg19_imagenet| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|549MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - VGG是牛津大学计算机视觉组和DeepMind在2014年提出的一种图像分类模型。该系列模型探索了卷积神经网络的深度与其性能之间的关系,通过实验证明了增加网络的深度能够在一定程度上影响网络最终的性能,到目前为止,VGG仍然被许多其他图像任务用作特征提取的BackBone网络。该PaddleHub Module结构为VGG19,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者Python接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install vgg19_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run vgg19_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="vgg19_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install vgg19_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/xception41_imagenet/README.md b/modules/image/classification/xception41_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a5ad52074a0527fe580f2c4f6870fd49af530cb6 --- /dev/null +++ b/modules/image/classification/xception41_imagenet/README.md @@ -0,0 +1,84 @@ +# xception41_imagenet + +|模型名称|xception41_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Xception| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - Xception 全称为 Extreme Inception,是 Google 于 2016年提出的 Inception V3 的改进模型。Xception 采用了深度可分离卷积(depthwise separable convolution) 来替换原来 Inception V3 中的卷积操作,整体的网络结构是带有残差连接的深度可分离卷积层的线性堆叠。该PaddleHub Module结构为Xception41,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install xception41_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run xception41_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="xception41_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install xception41_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/xception65_imagenet/README.md b/modules/image/classification/xception65_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1be8b866e1857f1d3f483baa0d7e54a964452e59 --- /dev/null +++ b/modules/image/classification/xception65_imagenet/README.md @@ -0,0 +1,84 @@ +# xception65_imagenet + +|模型名称|xception65_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Xception| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|140MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - Xception 全称为 Extreme Inception,是 Google 于 2016年提出的 Inception V3 的改进模型。Xception 采用了深度可分离卷积(depthwise separable convolution) 来替换原来 Inception V3 中的卷积操作,整体的网络结构是带有残差连接的深度可分离卷积层的线性堆叠。该PaddleHub Module结构为Xception65,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install xception65_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run xception65_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="xception65_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install xception65_imagenet==1.0.0 + ``` diff --git a/modules/image/classification/xception71_imagenet/README.md b/modules/image/classification/xception71_imagenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..28f44f91615a709c2dea2d58485a3760c0ba1edd --- /dev/null +++ b/modules/image/classification/xception71_imagenet/README.md @@ -0,0 +1,84 @@ +# xception71_imagenet + +|模型名称|xception71_imagenet| +| :--- | :---: | +|类别|图像-图像分类| +|网络|Xception| +|数据集|ImageNet-2012| +|是否支持Fine-tuning|否| +|模型大小|147MB| +|最新更新日期|-| +|数据指标|-| + + +## 一、模型基本信息 + + + +- ### 模型介绍 + + - Xception 全称为 Extreme Inception,是 Google 于 2016年提出的 Inception V3 的改进模型。Xception 采用了深度可分离卷积(depthwise separable convolution) 来替换原来 Inception V3 中的卷积操作,整体的网络结构是带有残差连接的深度可分离卷积层的线性堆叠。该PaddleHub Module结构为Xception71,基于ImageNet-2012数据集训练,接受输入图片大小为224 x 224 x 3,支持直接通过命令行或者 Python 接口进行预测。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + +- ### 2、安装 + + - ```shell + $ hub install xception71_imagenet + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run xception71_imagenet --input_path "/PATH/TO/IMAGE" + ``` + - 通过命令行方式实现图像分类模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + classifier = hub.Module(name="xception71_imagenet") + test_img_path = "/PATH/TO/IMAGE" + input_dict = {"image": [test_img_path]} + result = classifier.classification(data=input_dict) + ``` + +- ### 3、API + + - ```python + def classification(data) + ``` + - 分类接口API。 + - **参数** + - data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 + + - **返回** + - result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 + + + + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install xception71_imagenet==1.0.0 + ``` diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Large/README.md b/modules/image/depth_estimation/MiDaS_Large/README.md similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Large/README.md rename to modules/image/depth_estimation/MiDaS_Large/README.md diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Large/inference.py b/modules/image/depth_estimation/MiDaS_Large/inference.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Large/inference.py rename to modules/image/depth_estimation/MiDaS_Large/inference.py diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Large/module.py b/modules/image/depth_estimation/MiDaS_Large/module.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Large/module.py rename to modules/image/depth_estimation/MiDaS_Large/module.py diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Large/transforms.py b/modules/image/depth_estimation/MiDaS_Large/transforms.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Large/transforms.py rename to modules/image/depth_estimation/MiDaS_Large/transforms.py diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Large/utils.py b/modules/image/depth_estimation/MiDaS_Large/utils.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Large/utils.py rename to modules/image/depth_estimation/MiDaS_Large/utils.py diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Small/README.md b/modules/image/depth_estimation/MiDaS_Small/README.md similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Small/README.md rename to modules/image/depth_estimation/MiDaS_Small/README.md diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Small/inference.py b/modules/image/depth_estimation/MiDaS_Small/inference.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Small/inference.py rename to modules/image/depth_estimation/MiDaS_Small/inference.py diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Small/module.py b/modules/image/depth_estimation/MiDaS_Small/module.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Small/module.py rename to modules/image/depth_estimation/MiDaS_Small/module.py diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Small/transforms.py b/modules/image/depth_estimation/MiDaS_Small/transforms.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Small/transforms.py rename to modules/image/depth_estimation/MiDaS_Small/transforms.py diff --git a/modules/thirdparty/image/depth_estimation/MiDaS_Small/utils.py b/modules/image/depth_estimation/MiDaS_Small/utils.py similarity index 100% rename from modules/thirdparty/image/depth_estimation/MiDaS_Small/utils.py rename to modules/image/depth_estimation/MiDaS_Small/utils.py diff --git a/modules/thirdparty/image/keypoint_detection/hand_pose_localization/model.py b/modules/image/keypoint_detection/hand_pose_localization/model.py similarity index 100% rename from modules/thirdparty/image/keypoint_detection/hand_pose_localization/model.py rename to modules/image/keypoint_detection/hand_pose_localization/model.py diff --git a/modules/thirdparty/image/keypoint_detection/hand_pose_localization/module.py b/modules/image/keypoint_detection/hand_pose_localization/module.py similarity index 100% rename from modules/thirdparty/image/keypoint_detection/hand_pose_localization/module.py rename to modules/image/keypoint_detection/hand_pose_localization/module.py diff --git a/modules/thirdparty/image/keypoint_detection/hand_pose_localization/processor.py b/modules/image/keypoint_detection/hand_pose_localization/processor.py similarity index 100% rename from modules/thirdparty/image/keypoint_detection/hand_pose_localization/processor.py rename to modules/image/keypoint_detection/hand_pose_localization/processor.py diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md deleted file mode 100644 index 95b9a1dd61eb5477bb54bcc8188f52eadb6baa81..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/README.md +++ /dev/null @@ -1,138 +0,0 @@ -## 命令行预测 - -``` -$ hub run retinanet_resnet50_fpn_coco2017 --input_path "/PATH/TO/IMAGE" -``` - -## API - -``` -def context(trainable=True, - pretrained=True, - get_prediction=False) -``` - -提取特征,用于迁移学习。 - -**参数** - -* trainable(bool): 参数是否可训练; -* pretrained (bool): 是否加载预训练模型; -* get\_prediction (bool): 是否执行预测。 - -**返回** - -* inputs (dict): 模型的输入,keys 包括 'image', 'im\_size',相应的取值为: - * image (Variable): 图像变量 - * im\_size (Variable): 图片的尺寸 -* outputs (dict): 模型的输出。如果 get\_prediction 为 False,输出 'head\_fatures',否则输出 'bbox\_out'。 -* context\_prog (Program): 用于迁移学习的 Program. - -```python -def object_detection(paths=None, - images=None, - batch_size=1, - use_gpu=False, - output_dir='detection_result', - score_thresh=0.5, - visualization=True) -``` - -预测API,检测输入图片中的所有目标的位置。 - -**参数** - -* paths (list\[str\]): 图片的路径; -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU; -* score\_thresh (float): 识别置信度的阈值; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径,默认设为 detection\_result; - -**返回** - -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: - * data (list): 检测结果,list的每一个元素为 dict,各字段为: - * confidence (float): 识别的置信度; - * label (str): 标签; - * left (int): 边界框的左上角x坐标; - * top (int): 边界框的左上角y坐标; - * right (int): 边界框的右下角x坐标; - * bottom (int): 边界框的右下角y坐标; - * save\_path (str, optional): 识别结果的保存路径 (仅当visualization=True时存在)。 - -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` - -将模型保存到指定路径。 - -**参数** - -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 - -## 代码示例 - -```python -import paddlehub as hub -import cv2 - -object_detector = hub.Module(name="retinanet_resnet50_fpn_coco2017") -result = object_detector.object_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = object_detector.object_detection((paths=['/PATH/TO/IMAGE']) -``` - -## 服务部署 - -PaddleHub Serving可以部署一个目标检测的在线服务。 - -## 第一步:启动PaddleHub Serving - -运行启动命令: -```shell -$ hub serving start -m retinanet_resnet50_fpn_coco2017 -``` - -这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 - -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -## 第二步:发送预测请求 - -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - -```python -import requests -import json -import cv2 -import base64 - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/retinanet_resnet50_fpn_coco2017" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 打印预测结果 -print(r.json()["results"]) -``` - -### 依赖 - -paddlepaddle >= 1.6.2 - -paddlehub >= 1.6.0 diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py deleted file mode 100644 index dbef6a3fc4ae231e6e08dac93af4674066920b43..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/data_feed.py +++ /dev/null @@ -1,99 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from collections import OrderedDict - -import numpy as np -import cv2 -from PIL import Image, ImageEnhance -from paddle import fluid - -__all__ = ['test_reader', 'padding_minibatch'] - - -def test_reader(paths=None, images=None): - """ - data generator - - Args: - paths (list[str]): paths to images. - images (list(numpy.ndarray)): data of images, shape of each is [H, W, C] - - Yield: - res (dict): key contains 'image' and 'im_info', the corresponding values is: - image (numpy.ndarray): the image to be fed into network - im_info (numpy.ndarray): the info about the preprocessed. - """ - img_list = list() - if paths: - for img_path in paths: - assert os.path.isfile(img_path), "The {} isn't a valid file path.".format(img_path) - img = cv2.imread(img_path).astype('float32') - img_list.append(img) - if images is not None: - for img in images: - img_list.append(img) - for im in img_list: - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - im = im.astype(np.float32, copy=False) - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - mean = np.array(mean)[np.newaxis, np.newaxis, :] - std = np.array(std)[np.newaxis, np.newaxis, :] - im = im / 255.0 - im -= mean - im /= std - target_size = 800 - max_size = 1333 - shape = im.shape - # im_shape holds the original shape of image. - # im_shape = np.array([shape[0], shape[1], 1.0]).astype('float32') - im_size_min = np.min(shape[0:2]) - im_size_max = np.max(shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - - resize_w = np.round(im_scale * float(shape[1])) - resize_h = np.round(im_scale * float(shape[0])) - # im_info holds the resize info of image. - im_info = np.array([resize_h, resize_w, im_scale]).astype('float32') - - im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) - - # HWC --> CHW - im = np.swapaxes(im, 1, 2) - im = np.swapaxes(im, 1, 0) - yield {'image': im, 'im_info': im_info} - - -def padding_minibatch(batch_data, coarsest_stride=0, use_padded_im_info=True): - max_shape_org = np.array([data['image'].shape for data in batch_data]).max(axis=0) - if coarsest_stride > 0: - max_shape = np.zeros((3)).astype('int32') - max_shape[1] = int(np.ceil(max_shape_org[1] / coarsest_stride) * coarsest_stride) - max_shape[2] = int(np.ceil(max_shape_org[2] / coarsest_stride) * coarsest_stride) - else: - max_shape = max_shape_org.astype('int32') - - padding_image = list() - padding_info = list() - padding_shape = list() - - for data in batch_data: - im_c, im_h, im_w = data['image'].shape - # image - padding_im = np.zeros((im_c, max_shape[1], max_shape[2]), dtype=np.float32) - padding_im[:, 0:im_h, 0:im_w] = data['image'] - padding_image.append(padding_im) - # im_info - data['im_info'][0] = max_shape[1] if use_padded_im_info else max_shape_org[1] - data['im_info'][1] = max_shape[2] if use_padded_im_info else max_shape_org[2] - padding_info.append(data['im_info']) - - padding_image = np.array(padding_image).astype('float32') - padding_info = np.array(padding_info).astype('float32') - return padding_image, padding_info diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py deleted file mode 100644 index 803b8acde9bbd289237d2bd1b9735fd905964edf..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/fpn.py +++ /dev/null @@ -1,237 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from collections import OrderedDict - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.regularizer import L2Decay - -__all__ = ['FPN'] - - -def ConvNorm(input, - num_filters, - filter_size, - stride=1, - groups=1, - norm_decay=0., - norm_type='affine_channel', - norm_groups=32, - dilation=1, - lr_scale=1, - freeze_norm=False, - act=None, - norm_name=None, - initializer=None, - name=None): - fan = num_filters - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=((filter_size - 1) // 2) * dilation, - dilation=dilation, - groups=groups, - act=None, - param_attr=ParamAttr(name=name + "_weights", initializer=initializer, learning_rate=lr_scale), - bias_attr=False, - name=name + '.conv2d.output.1') - norm_lr = 0. if freeze_norm else 1. - pattr = ParamAttr(name=norm_name + '_scale', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) - battr = ParamAttr(name=norm_name + '_offset', learning_rate=norm_lr * lr_scale, regularizer=L2Decay(norm_decay)) - if norm_type in ['bn', 'sync_bn']: - global_stats = True if freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=norm_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=norm_name + '_mean', - moving_variance_name=norm_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'gn': - out = fluid.layers.group_norm( - input=conv, act=act, name=norm_name + '.output.1', groups=norm_groups, param_attr=pattr, bias_attr=battr) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) - if freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - -class FPN(object): - """ - Feature Pyramid Network, see https://arxiv.org/abs/1612.03144 - - Args: - num_chan (int): number of feature channels - min_level (int): lowest level of the backbone feature map to use - max_level (int): highest level of the backbone feature map to use - spatial_scale (list): feature map scaling factor - has_extra_convs (bool): whether has extral convolutions in higher levels - norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' - """ - __shared__ = ['norm_type', 'freeze_norm'] - - def __init__(self, - num_chan=256, - min_level=2, - max_level=6, - spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], - has_extra_convs=False, - norm_type=None, - freeze_norm=False): - self.freeze_norm = freeze_norm - self.num_chan = num_chan - self.min_level = min_level - self.max_level = max_level - self.spatial_scale = spatial_scale - self.has_extra_convs = has_extra_convs - self.norm_type = norm_type - - def _add_topdown_lateral(self, body_name, body_input, upper_output): - lateral_name = 'fpn_inner_' + body_name + '_lateral' - topdown_name = 'fpn_topdown_' + body_name - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - lateral = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=lateral_name, - norm_name=lateral_name) - else: - lateral = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr(name=lateral_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr(name=lateral_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), - name=lateral_name) - topdown = fluid.layers.resize_nearest(upper_output, scale=2., name=topdown_name) - - return lateral + topdown - - def get_output(self, body_dict): - """ - Add FPN onto backbone. - - Args: - body_dict(OrderedDict): Dictionary of variables and each element is the - output of backbone. - - Return: - fpn_dict(OrderedDict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - """ - spatial_scale = copy.deepcopy(self.spatial_scale) - body_name_list = list(body_dict.keys())[::-1] - num_backbone_stages = len(body_name_list) - self.fpn_inner_output = [[] for _ in range(num_backbone_stages)] - fpn_inner_name = 'fpn_inner_' + body_name_list[0] - body_input = body_dict[body_name_list[0]] - fan = body_input.shape[1] - if self.norm_type: - initializer = Xavier(fan_out=fan) - self.fpn_inner_output[0] = ConvNorm( - body_input, - self.num_chan, - 1, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_inner_name, - norm_name=fpn_inner_name) - else: - self.fpn_inner_output[0] = fluid.layers.conv2d( - body_input, - self.num_chan, - 1, - param_attr=ParamAttr(name=fpn_inner_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr(name=fpn_inner_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), - name=fpn_inner_name) - for i in range(1, num_backbone_stages): - body_name = body_name_list[i] - body_input = body_dict[body_name] - top_output = self.fpn_inner_output[i - 1] - fpn_inner_single = self._add_topdown_lateral(body_name, body_input, top_output) - self.fpn_inner_output[i] = fpn_inner_single - fpn_dict = {} - fpn_name_list = [] - for i in range(num_backbone_stages): - fpn_name = 'fpn_' + body_name_list[i] - fan = self.fpn_inner_output[i].shape[1] * 3 * 3 - if self.norm_type: - initializer = Xavier(fan_out=fan) - fpn_output = ConvNorm( - self.fpn_inner_output[i], - self.num_chan, - 3, - initializer=initializer, - norm_type=self.norm_type, - freeze_norm=self.freeze_norm, - name=fpn_name, - norm_name=fpn_name) - else: - fpn_output = fluid.layers.conv2d( - self.fpn_inner_output[i], - self.num_chan, - filter_size=3, - padding=1, - param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_output - fpn_name_list.append(fpn_name) - if not self.has_extra_convs and self.max_level - self.min_level == len(spatial_scale): - body_top_name = fpn_name_list[0] - body_top_extension = fluid.layers.pool2d( - fpn_dict[body_top_name], 1, 'max', pool_stride=2, name=body_top_name + '_subsampled_2x') - fpn_dict[body_top_name + '_subsampled_2x'] = body_top_extension - fpn_name_list.insert(0, body_top_name + '_subsampled_2x') - spatial_scale.insert(0, spatial_scale[0] * 0.5) - # Coarser FPN levels introduced for RetinaNet - highest_backbone_level = self.min_level + len(spatial_scale) - 1 - if self.has_extra_convs and self.max_level > highest_backbone_level: - fpn_blob = body_dict[body_name_list[0]] - for i in range(highest_backbone_level + 1, self.max_level + 1): - fpn_blob_in = fpn_blob - fpn_name = 'fpn_' + str(i) - if i > highest_backbone_level + 1: - fpn_blob_in = fluid.layers.relu(fpn_blob) - fan = fpn_blob_in.shape[1] * 3 * 3 - fpn_blob = fluid.layers.conv2d( - input=fpn_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=2, - padding=1, - param_attr=ParamAttr(name=fpn_name + "_w", initializer=Xavier(fan_out=fan)), - bias_attr=ParamAttr(name=fpn_name + "_b", learning_rate=2., regularizer=L2Decay(0.)), - name=fpn_name) - fpn_dict[fpn_name] = fpn_blob - fpn_name_list.insert(0, fpn_name) - spatial_scale.insert(0, spatial_scale[0] * 0.5) - res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) - return res_dict, spatial_scale diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/label_file.txt b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/label_file.txt deleted file mode 100644 index d7d43a94adf73208f997f0efd6581bef11ca734e..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/label_file.txt +++ /dev/null @@ -1,81 +0,0 @@ -background -person -bicycle -car -motorcycle -airplane -bus -train -truck -boat -traffic light -fire hydrant -stop sign -parking meter -bench -bird -cat -dog -horse -sheep -cow -elephant -bear -zebra -giraffe -backpack -umbrella -handbag -tie -suitcase -frisbee -skis -snowboard -sports ball -kite -baseball bat -baseball glove -skateboard -surfboard -tennis racket -bottle -wine glass -cup -fork -knife -spoon -bowl -banana -apple -sandwich -orange -broccoli -carrot -hot dog -pizza -donut -cake -chair -couch -potted plant -bed -dining table -toilet -tv -laptop -mouse -remote -keyboard -cell phone -microwave -oven -toaster -sink -refrigerator -book -clock -vase -scissors -teddy bear -hair drier -toothbrush diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py deleted file mode 100644 index 5070dacb42d0eb4ca20d6e752c7239b83b2257ee..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/module.py +++ /dev/null @@ -1,302 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import ast -import argparse -from functools import partial - -import numpy as np -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import moduleinfo, runnable, serving -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor -from paddlehub.io.parser import txt_parser -from paddlehub.common.paddle_helper import add_vars_prefix - -from retinanet_resnet50_fpn_coco2017.fpn import FPN -from retinanet_resnet50_fpn_coco2017.retina_head import AnchorGenerator, RetinaTargetAssign, RetinaOutputDecoder, RetinaHead -from retinanet_resnet50_fpn_coco2017.processor import load_label_info, postprocess, base64_to_cv2 -from retinanet_resnet50_fpn_coco2017.data_feed import test_reader, padding_minibatch -from retinanet_resnet50_fpn_coco2017.resnet import ResNet - - -@moduleinfo( - name="retinanet_resnet50_fpn_coco2017", - version="1.0.0", - type="cv/object_detection", - summary="Baidu's RetinaNet model for object detection, with backbone ResNet50 and FPN.", - author="paddlepaddle", - author_email="paddle-dev@baidu.com") -class RetinaNetResNet50FPN(hub.Module): - def _initialize(self): - # default pretrained model of Retinanet_ResNet50_FPN, the shape of input image tensor is (3, 608, 608) - self.default_pretrained_model_path = os.path.join(self.directory, "retinanet_resnet50_fpn_model") - self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) - self.infer_prog = None - self.image = None - self.im_info = None - self.bbox_out = None - self._set_config() - - def _set_config(self): - """ - predictor config setting - """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) - cpu_config.disable_glog_info() - cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) - gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) - - def context(self, num_classes=81, trainable=True, pretrained=True, phase='train'): - """ - Distill the Head Features, so as to perform transfer learning. - - Args: - num_classes (int): number of classes. - trainable (bool): whether to set parameters trainable. - pretrained (bool): whether to load default pretrained model. - phase (str): optional choices are 'train' and 'predict'. - - Returns: - inputs(dict): the input variables. - outputs(dict): the output variables. - context_prog (Program): the program to execute transfer learning. - """ - context_prog = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(context_prog, startup_program): - with fluid.unique_name.guard(): - var_prefix = '@HUB_{}@'.format(self.name) - # image - image = fluid.layers.data(name='image', shape=[-1, 3, -1, -1], dtype='float32', lod_level=0) - # im_info - im_info = fluid.layers.data(name='im_info', shape=[3], dtype='float32', lod_level=0) - # backbone - backbone = ResNet( - norm_type='affine_channel', freeze_at=2, norm_decay=0., depth=50, feature_maps=[3, 4, 5]) - body_feats = backbone(image) - # retina_head - retina_head = RetinaHead( - anchor_generator=AnchorGenerator(aspect_ratios=[1.0, 2.0, 0.5], variance=[1.0, 1.0, 1.0, 1.0]), - target_assign=RetinaTargetAssign(positive_overlap=0.5, negative_overlap=0.4), - output_decoder=RetinaOutputDecoder( - score_thresh=0.05, nms_thresh=0.5, pre_nms_top_n=1000, detections_per_im=100, nms_eta=1.0), - num_convs_per_octave=4, - num_chan=256, - max_level=7, - min_level=3, - prior_prob=0.01, - base_scale=4, - num_scales_per_octave=3) - # fpn - fpn = FPN( - max_level=7, - min_level=3, - num_chan=256, - spatial_scale=[0.03125, 0.0625, 0.125], - has_extra_convs=True) - # body_feats - body_feats, spatial_scale = fpn.get_output(body_feats) - # inputs, outputs, context_prog - inputs = {'image': var_prefix + image.name, 'im_info': var_prefix + im_info.name} - if phase == 'predict': - pred = retina_head.get_prediction(body_feats, spatial_scale, im_info) - outputs = {'bbox_out': var_prefix + pred.name} - else: - outputs = {'body_features': [var_prefix + var.name for key, var in body_feats.items()]} - - # add_vars_prefix - add_vars_prefix(context_prog, var_prefix) - add_vars_prefix(fluid.default_startup_program(), var_prefix) - - global_vars = context_prog.global_block().vars - inputs = {key: global_vars[value] for key, value in inputs.items()} - outputs = { - key: global_vars[value] if not isinstance(value, list) else [global_vars[var] for var in value] - for key, value in outputs.items() - } - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - for param in context_prog.global_block().iter_parameters(): - param.trainable = trainable - if pretrained: - - def _if_exist(var): - return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) - else: - exe.run(startup_program) - return inputs, outputs, context_prog - - def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.default_pretrained_model_path, executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def object_detection(self, - paths=None, - images=None, - use_gpu=False, - batch_size=1, - output_dir='detection_result', - score_thresh=0.5, - visualization=True): - """API of Object Detection. - - Args: - paths (list[str]): The paths of images. - images (list(numpy.ndarray)): images data, shape of each is [H, W, C] - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. - output_dir (str): The path to store output images. - visualization (bool): Whether to save image or not. - score_thresh (float): threshold for object detecion. - visualization (bool): whether to save result as images. - - Returns: - res (list[dict]): The result of coco2017 detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str, optional): The path to save output images. - """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) - - all_images = list() - paths = paths if paths else list() - for yield_data in test_reader(paths, images): - all_images.append(yield_data) - - images_num = len(all_images) - loop_num = int(np.ceil(images_num / batch_size)) - res = list() - for iter_id in range(loop_num): - batch_data = list() - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - batch_data.append(all_images[handle_id + image_id]) - except: - pass - padding_image, padding_info = padding_minibatch(batch_data, coarsest_stride=32, use_padded_im_info=True) - padding_image_tensor = PaddleTensor(padding_image.copy()) - padding_info_tensor = PaddleTensor(padding_info.copy()) - feed_list = [padding_image_tensor, padding_info_tensor] - if use_gpu: - data_out = self.gpu_predictor.run(feed_list) - else: - data_out = self.cpu_predictor.run(feed_list) - output = postprocess( - paths=paths, - images=images, - data_out=data_out, - score_thresh=score_thresh, - label_names=self.label_names, - output_dir=output_dir, - handle_id=handle_id, - visualization=visualization) - res += output - return res - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") - - self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument('--input_path', type=str, default=None, help="input data") - - self.arg_input_group.add_argument('--input_file', type=str, default=None, help="file contain input data") - - def check_input_data(self, args): - input_data = list() - if args.input_path: - input_data = [args.input_path] - elif args.input_file: - if not os.path.exists(args.input_file): - raise RuntimeError("File %s is not exist." % args.input_file) - else: - input_data = txt_parser.parse(args.input_file, use_strip=True) - return input_data - - @serving - def serving_method(self, images, **kwargs): - """ - Run as a service. - """ - images_decode = [base64_to_cv2(image) for image in images] - results = self.object_detection(images=images_decode, **kwargs) - return results - - @runnable - def run_cmd(self, argvs): - self.parser = argparse.ArgumentParser( - description="Run the {}".format(self.name), - prog="hub run {}".format(self.name), - usage='%(prog)s', - add_help=True) - self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", description="Run configuration for controlling module behavior, not required.") - self.add_module_config_arg() - - self.add_module_input_arg() - args = self.parser.parse_args(argvs) - input_data = self.check_input_data(args) - if len(input_data) == 0: - self.parser.print_help() - exit(1) - else: - for image_path in input_data: - if not os.path.exists(image_path): - raise RuntimeError("File %s or %s is not exist." % image_path) - return self.object_detection(paths=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size) diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/name_adapter.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/name_adapter.py deleted file mode 100644 index bebf8bdeeec3aa76357d95cc52ba5a009e19d46f..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/name_adapter.py +++ /dev/null @@ -1,61 +0,0 @@ -# coding=utf-8 - - -class NameAdapter(object): - """Fix the backbones variable names for pretrained weight""" - - def __init__(self, model): - super(NameAdapter, self).__init__() - self.model = model - - @property - def model_type(self): - return getattr(self.model, '_model_type', '') - - @property - def variant(self): - return getattr(self.model, 'variant', '') - - def fix_conv_norm_name(self, name): - if name == "conv1": - bn_name = "bn_" + name - else: - bn_name = "bn" + name[3:] - # the naming rule is same as pretrained weight - if self.model_type == 'SEResNeXt': - bn_name = name + "_bn" - return bn_name - - def fix_shortcut_name(self, name): - if self.model_type == 'SEResNeXt': - name = 'conv' + name + '_prj' - return name - - def fix_bottleneck_name(self, name): - if self.model_type == 'SEResNeXt': - conv_name1 = 'conv' + name + '_x1' - conv_name2 = 'conv' + name + '_x2' - conv_name3 = 'conv' + name + '_x3' - shortcut_name = name - else: - conv_name1 = name + "_branch2a" - conv_name2 = name + "_branch2b" - conv_name3 = name + "_branch2c" - shortcut_name = name + "_branch1" - return conv_name1, conv_name2, conv_name3, shortcut_name - - def fix_layer_warp_name(self, stage_num, count, i): - name = 'res' + str(stage_num) - if count > 10 and stage_num == 4: - if i == 0: - conv_name = name + "a" - else: - conv_name = name + "b" + str(i) - else: - conv_name = name + chr(ord("a") + i) - if self.model_type == 'SEResNeXt': - conv_name = str(stage_num + 2) + '_' + str(i + 1) - return conv_name - - def fix_c1_stage_name(self): - return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py deleted file mode 100644 index 839df4caf744280001f033d8ef6a3d560277368e..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/nonlocal_helper.py +++ /dev/null @@ -1,151 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals - -import paddle.fluid as fluid -from paddle.fluid import ParamAttr - -nonlocal_params = { - "use_zero_init_conv": False, - "conv_init_std": 0.01, - "no_bias": True, - "use_maxpool": False, - "use_softmax": True, - "use_bn": False, - "use_scale": True, # vital for the model prformance!!! - "use_affine": False, - "bn_momentum": 0.9, - "bn_epsilon": 1.0000001e-5, - "bn_init_gamma": 0.9, - "weight_decay_bn": 1.e-4, -} - - -def space_nonlocal(input, dim_in, dim_out, prefix, dim_inner, max_pool_stride=2): - cur = input - theta = fluid.layers.conv2d(input = cur, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr=ParamAttr(name = prefix + '_theta' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_theta' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if not nonlocal_params["no_bias"] else False, \ - name = prefix + '_theta') - theta_shape = theta.shape - theta_shape_op = fluid.layers.shape(theta) - theta_shape_op.stop_gradient = True - - if nonlocal_params["use_maxpool"]: - max_pool = fluid.layers.pool2d(input = cur, \ - pool_size = [max_pool_stride, max_pool_stride], \ - pool_type = 'max', \ - pool_stride = [max_pool_stride, max_pool_stride], \ - pool_padding = [0, 0], \ - name = prefix + '_pool') - else: - max_pool = cur - - phi = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_phi' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_phi' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_phi') - phi_shape = phi.shape - - g = fluid.layers.conv2d(input = max_pool, num_filters = dim_inner, \ - filter_size = [1, 1], stride = [1, 1], \ - padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_g' + "_w", \ - initializer = fluid.initializer.Normal(loc = 0.0, scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_g' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_g') - g_shape = g.shape - # we have to use explicit batch size (to support arbitrary spacetime size) - # e.g. (8, 1024, 4, 14, 14) => (8, 1024, 784) - theta = fluid.layers.reshape(theta, shape=(0, 0, -1)) - theta = fluid.layers.transpose(theta, [0, 2, 1]) - phi = fluid.layers.reshape(phi, [0, 0, -1]) - theta_phi = fluid.layers.matmul(theta, phi, name=prefix + '_affinity') - g = fluid.layers.reshape(g, [0, 0, -1]) - - if nonlocal_params["use_softmax"]: - if nonlocal_params["use_scale"]: - theta_phi_sc = fluid.layers.scale(theta_phi, scale=dim_inner**-.5) - else: - theta_phi_sc = theta_phi - p = fluid.layers.softmax(theta_phi_sc, name=prefix + '_affinity' + '_prob') - else: - # not clear about what is doing in xlw's code - p = None # not implemented - raise "Not implemented when not use softmax" - - # note g's axis[2] corresponds to p's axis[2] - # e.g. g(8, 1024, 784_2) * p(8, 784_1, 784_2) => (8, 1024, 784_1) - p = fluid.layers.transpose(p, [0, 2, 1]) - t = fluid.layers.matmul(g, p, name=prefix + '_y') - - # reshape back - # e.g. (8, 1024, 784) => (8, 1024, 4, 14, 14) - t_shape = t.shape - t_re = fluid.layers.reshape(t, shape=list(theta_shape), actual_shape=theta_shape_op) - blob_out = t_re - blob_out = fluid.layers.conv2d(input = blob_out, num_filters = dim_out, \ - filter_size = [1, 1], stride = [1, 1], padding = [0, 0], \ - param_attr = ParamAttr(name = prefix + '_out' + "_w", \ - initializer = fluid.initializer.Constant(value = 0.) \ - if nonlocal_params["use_zero_init_conv"] \ - else fluid.initializer.Normal(loc = 0.0, - scale = nonlocal_params["conv_init_std"])), \ - bias_attr = ParamAttr(name = prefix + '_out' + "_b", \ - initializer = fluid.initializer.Constant(value = 0.)) \ - if (nonlocal_params["no_bias"] == 0) else False, \ - name = prefix + '_out') - blob_out_shape = blob_out.shape - - if nonlocal_params["use_bn"]: - bn_name = prefix + "_bn" - blob_out = fluid.layers.batch_norm(blob_out, \ - # is_test = test_mode, \ - momentum = nonlocal_params["bn_momentum"], \ - epsilon = nonlocal_params["bn_epsilon"], \ - name = bn_name, \ - param_attr = ParamAttr(name = bn_name + "_s", \ - initializer = fluid.initializer.Constant(value = nonlocal_params["bn_init_gamma"]), \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - bias_attr = ParamAttr(name = bn_name + "_b", \ - regularizer = fluid.regularizer.L2Decay(nonlocal_params["weight_decay_bn"])), \ - moving_mean_name = bn_name + "_rm", \ - moving_variance_name = bn_name + "_riv") # add bn - - if nonlocal_params["use_affine"]: - affine_scale = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_s'), \ - default_initializer = fluid.initializer.Constant(value = 1.)) - affine_bias = fluid.layers.create_parameter(\ - shape=[blob_out_shape[1]], dtype = blob_out.dtype, \ - attr=ParamAttr(name=prefix + '_affine' + '_b'), \ - default_initializer = fluid.initializer.Constant(value = 0.)) - blob_out = fluid.layers.affine_channel(blob_out, scale = affine_scale, \ - bias = affine_bias, name = prefix + '_affine') # add affine - - return blob_out - - -def add_space_nonlocal(input, dim_in, dim_out, prefix, dim_inner): - ''' - add_space_nonlocal: - Non-local Neural Networks: see https://arxiv.org/abs/1711.07971 - ''' - conv = space_nonlocal(input, dim_in, dim_out, prefix, dim_inner) - output = fluid.layers.elementwise_add(input, conv, name=prefix + '_sum') - return output diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py deleted file mode 100644 index 167508096e96cbda4645bb4b20cb6b080ce5f37d..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/processor.py +++ /dev/null @@ -1,162 +0,0 @@ -# coding=utf-8 -import base64 -import os - -import cv2 -import numpy as np -from PIL import Image, ImageDraw - -__all__ = [ - 'base64_to_cv2', - 'load_label_info', - 'postprocess', -] - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - - -def get_save_image_name(img, output_dir, image_path): - """Get save image name from source image path. - """ - image_name = os.path.split(image_path)[-1] - name, ext = os.path.splitext(image_name) - if ext == '': - if img.format == 'PNG': - ext = '.png' - elif img.format == 'JPEG': - ext = '.jpg' - elif img.format == 'BMP': - ext = '.bmp' - else: - if img.mode == "RGB" or img.mode == "L": - ext = ".jpg" - elif img.mode == "RGBA" or img.mode == "P": - ext = '.png' - - return os.path.join(output_dir, "{}".format(name)) + ext - - -def draw_bounding_box_on_image(image_path, data_list, save_dir): - image = Image.open(image_path) - draw = ImageDraw.Draw(image) - for data in data_list: - left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] - # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') - - # draw label - if image.mode == 'RGB': - text = data['label'] + ": %.2f%%" % (100 * data['confidence']) - textsize_width, textsize_height = draw.textsize(text=text) - draw.rectangle( - xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) - draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) - - save_name = get_save_image_name(image, save_dir, image_path) - if os.path.exists(save_name): - os.remove(save_name) - image.save(save_name) - - return save_name - - -def clip_bbox(bbox, img_width, img_height): - xmin = max(min(bbox[0], img_width), 0.) - ymin = max(min(bbox[1], img_height), 0.) - xmax = max(min(bbox[2], img_width), 0.) - ymax = max(min(bbox[3], img_height), 0.) - return float(xmin), float(ymin), float(xmax), float(ymax) - - -def load_label_info(file_path): - with open(file_path, 'r') as fr: - text = fr.readlines() - label_names = [] - for info in text: - label_names.append(info.strip()) - return label_names - - -def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization): - """ - postprocess the lod_tensor produced by fluid.Executor.run - - Args: - paths (list[str]): the path of images. - images (list(numpy.ndarray)): list of images, shape of each is [H, W, C]. - data_out (lod_tensor): data produced by executor.run. - score_thresh (float): the low limit of bounding box. - label_names (list[str]): label names. - output_dir (str): output directory. - handle_id (int): The number of images that have been handled. - visualization (bool): whether to save as images. - - Returns: - res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: - data (dict): the result of object detection, keys include 'left', 'top', 'right', 'bottom', 'label', 'confidence', the corresponding value is: - left (float): The X coordinate of the upper left corner of the bounding box; - top (float): The Y coordinate of the upper left corner of the bounding box; - right (float): The X coordinate of the lower right corner of the bounding box; - bottom (float): The Y coordinate of the lower right corner of the bounding box; - label (str): The label of detection result; - confidence (float): The confidence of detection result. - save_path (str): The path to save output images. - """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() - - if handle_id < len(paths): - unhandled_paths = paths[handle_id:] - unhandled_paths_num = len(unhandled_paths) - else: - unhandled_paths_num = 0 - - output_dir = output_dir if output_dir else os.path.join(os.getcwd(), 'detection_result') - if visualization: - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - output = [] - for index in range(len(lod) - 1): - output_i = {'data': []} - if index < unhandled_paths_num: - org_img_path = unhandled_paths[index] - org_img = Image.open(org_img_path) - output_i['path'] = org_img_path - else: - org_img = images[index - unhandled_paths_num] - org_img = org_img.astype(np.uint8) - org_img = Image.fromarray(org_img[:, :, ::-1]) - if visualization: - org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) - org_img.save(org_img_path) - org_img_height = org_img.height - org_img_width = org_img.width - result_i = results[lod[index]:lod[index + 1]] - - for row in result_i: - if len(row) != 6: - continue - if row[1] < score_thresh: - continue - category_id = int(row[0]) - confidence = row[1] - bbox = row[2:] - dt = {} - dt['label'] = label_names[category_id] - dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) - output_i['data'].append(dt) - - output.append(output_i) - - if visualization: - output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) - - return output diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py deleted file mode 100644 index 77a3f7f4c7b16c3f9c65c46fc93eb394befa5110..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/resnet.py +++ /dev/null @@ -1,364 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math -from collections import OrderedDict -from numbers import Integral - -from paddle import fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.framework import Variable -from paddle.fluid.regularizer import L2Decay -from paddle.fluid.initializer import Constant - -from .nonlocal_helper import add_space_nonlocal -from .name_adapter import NameAdapter - -__all__ = ['ResNet', 'ResNetC5'] - - -class ResNet(object): - """ - Residual Network, see https://arxiv.org/abs/1512.03385 - Args: - depth (int): ResNet depth, should be 34, 50. - freeze_at (int): freeze the backbone at which stage - norm_type (str): normalization type, 'bn'/'sync_bn'/'affine_channel' - freeze_norm (bool): freeze normalization layers - norm_decay (float): weight decay for normalization layer weights - variant (str): ResNet variant, supports 'a', 'b', 'c', 'd' currently - feature_maps (list): index of stages whose feature maps are returned - dcn_v2_stages (list): index of stages who select deformable conv v2 - nonlocal_stages (list): index of stages who select nonlocal networks - """ - __shared__ = ['norm_type', 'freeze_norm', 'weight_prefix_name'] - - def __init__(self, - depth=50, - freeze_at=0, - norm_type='sync_bn', - freeze_norm=False, - norm_decay=0., - variant='b', - feature_maps=[3, 4, 5], - dcn_v2_stages=[], - weight_prefix_name='', - nonlocal_stages=[], - get_prediction=False, - class_dim=1000): - super(ResNet, self).__init__() - - if isinstance(feature_maps, Integral): - feature_maps = [feature_maps] - - assert depth in [34, 50], \ - "depth {} not in [34, 50]" - assert variant in ['a', 'b', 'c', 'd'], "invalid ResNet variant" - assert 0 <= freeze_at <= 4, "freeze_at should be 0, 1, 2, 3 or 4" - assert len(feature_maps) > 0, "need one or more feature maps" - assert norm_type in ['bn', 'sync_bn', 'affine_channel'] - assert not (len(nonlocal_stages)>0 and depth<50), \ - "non-local is not supported for resnet18 or resnet34" - - self.depth = depth - self.freeze_at = freeze_at - self.norm_type = norm_type - self.norm_decay = norm_decay - self.freeze_norm = freeze_norm - self.variant = variant - self._model_type = 'ResNet' - self.feature_maps = feature_maps - self.dcn_v2_stages = dcn_v2_stages - self.depth_cfg = { - 34: ([3, 4, 6, 3], self.basicblock), - 50: ([3, 4, 6, 3], self.bottleneck), - } - self.stage_filters = [64, 128, 256, 512] - self._c1_out_chan_num = 64 - self.na = NameAdapter(self) - self.prefix_name = weight_prefix_name - - self.nonlocal_stages = nonlocal_stages - self.nonlocal_mod_cfg = { - 50: 2, - 101: 5, - 152: 8, - 200: 12, - } - self.get_prediction = get_prediction - self.class_dim = class_dim - - def _conv_offset(self, input, filter_size, stride, padding, act=None, name=None): - out_channel = filter_size * filter_size * 3 - out = fluid.layers.conv2d( - input, - num_filters=out_channel, - filter_size=filter_size, - stride=stride, - padding=padding, - param_attr=ParamAttr(initializer=Constant(0.0), name=name + ".w_0"), - bias_attr=ParamAttr(initializer=Constant(0.0), name=name + ".b_0"), - act=act, - name=name) - return out - - def _conv_norm(self, input, num_filters, filter_size, stride=1, groups=1, act=None, name=None, dcn_v2=False): - _name = self.prefix_name + name if self.prefix_name != '' else name - if not dcn_v2: - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + '.conv2d.output.1') - else: - # select deformable conv" - offset_mask = self._conv_offset( - input=input, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - act=None, - name=_name + "_conv_offset") - offset_channel = filter_size**2 * 2 - mask_channel = filter_size**2 - offset, mask = fluid.layers.split(input=offset_mask, num_or_sections=[offset_channel, mask_channel], dim=1) - mask = fluid.layers.sigmoid(mask) - conv = fluid.layers.deformable_conv( - input=input, - offset=offset, - mask=mask, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - deformable_groups=1, - im2col_step=1, - param_attr=ParamAttr(name=_name + "_weights"), - bias_attr=False, - name=_name + ".conv2d.output.1") - - bn_name = self.na.fix_conv_norm_name(name) - bn_name = self.prefix_name + bn_name if self.prefix_name != '' else bn_name - - norm_lr = 0. if self.freeze_norm else 1. - norm_decay = self.norm_decay - pattr = ParamAttr(name=bn_name + '_scale', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) - battr = ParamAttr(name=bn_name + '_offset', learning_rate=norm_lr, regularizer=L2Decay(norm_decay)) - - if self.norm_type in ['bn', 'sync_bn']: - global_stats = True if self.freeze_norm else False - out = fluid.layers.batch_norm( - input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=pattr, - bias_attr=battr, - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance', - use_global_stats=global_stats) - scale = fluid.framework._get_var(pattr.name) - bias = fluid.framework._get_var(battr.name) - elif self.norm_type == 'affine_channel': - scale = fluid.layers.create_parameter( - shape=[conv.shape[1]], dtype=conv.dtype, attr=pattr, default_initializer=fluid.initializer.Constant(1.)) - bias = fluid.layers.create_parameter( - shape=[conv.shape[1]], dtype=conv.dtype, attr=battr, default_initializer=fluid.initializer.Constant(0.)) - out = fluid.layers.affine_channel(x=conv, scale=scale, bias=bias, act=act) - if self.freeze_norm: - scale.stop_gradient = True - bias.stop_gradient = True - return out - - def _shortcut(self, input, ch_out, stride, is_first, name): - max_pooling_in_short_cut = self.variant == 'd' - ch_in = input.shape[1] - # the naming rule is same as pretrained weight - name = self.na.fix_shortcut_name(name) - std_senet = getattr(self, 'std_senet', False) - if ch_in != ch_out or stride != 1 or (self.depth < 50 and is_first): - if std_senet: - if is_first: - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return self._conv_norm(input, ch_out, 3, stride, name=name) - if max_pooling_in_short_cut and not is_first: - input = fluid.layers.pool2d( - input=input, pool_size=2, pool_stride=2, pool_padding=0, ceil_mode=True, pool_type='avg') - return self._conv_norm(input, ch_out, 1, 1, name=name) - return self._conv_norm(input, ch_out, 1, stride, name=name) - else: - return input - - def bottleneck(self, input, num_filters, stride, is_first, name, dcn_v2=False): - if self.variant == 'a': - stride1, stride2 = stride, 1 - else: - stride1, stride2 = 1, stride - - # ResNeXt - groups = getattr(self, 'groups', 1) - group_width = getattr(self, 'group_width', -1) - if groups == 1: - expand = 4 - elif (groups * group_width) == 256: - expand = 1 - else: # FIXME hard code for now, handles 32x4d, 64x4d and 32x8d - num_filters = num_filters // 2 - expand = 2 - - conv_name1, conv_name2, conv_name3, \ - shortcut_name = self.na.fix_bottleneck_name(name) - std_senet = getattr(self, 'std_senet', False) - if std_senet: - conv_def = [[int(num_filters / 2), 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - else: - conv_def = [[num_filters, 1, stride1, 'relu', 1, conv_name1], - [num_filters, 3, stride2, 'relu', groups, conv_name2], - [num_filters * expand, 1, 1, None, 1, conv_name3]] - - residual = input - for i, (c, k, s, act, g, _name) in enumerate(conv_def): - residual = self._conv_norm( - input=residual, - num_filters=c, - filter_size=k, - stride=s, - act=act, - groups=g, - name=_name, - dcn_v2=(i == 1 and dcn_v2)) - short = self._shortcut(input, num_filters * expand, stride, is_first=is_first, name=shortcut_name) - # Squeeze-and-Excitation - if callable(getattr(self, '_squeeze_excitation', None)): - residual = self._squeeze_excitation(input=residual, num_channels=num_filters, name='fc' + name) - return fluid.layers.elementwise_add(x=short, y=residual, act='relu', name=name + ".add.output.5") - - def basicblock(self, input, num_filters, stride, is_first, name, dcn_v2=False): - assert dcn_v2 is False, "Not implemented yet." - conv0 = self._conv_norm( - input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, name=name + "_branch2a") - conv1 = self._conv_norm(input=conv0, num_filters=num_filters, filter_size=3, act=None, name=name + "_branch2b") - short = self._shortcut(input, num_filters, stride, is_first, name=name + "_branch1") - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - def layer_warp(self, input, stage_num): - """ - Args: - input (Variable): input variable. - stage_num (int): the stage number, should be 2, 3, 4, 5 - - Returns: - The last variable in endpoint-th stage. - """ - assert stage_num in [2, 3, 4, 5] - - stages, block_func = self.depth_cfg[self.depth] - count = stages[stage_num - 2] - - ch_out = self.stage_filters[stage_num - 2] - is_first = False if stage_num != 2 else True - dcn_v2 = True if stage_num in self.dcn_v2_stages else False - - nonlocal_mod = 1000 - if stage_num in self.nonlocal_stages: - nonlocal_mod = self.nonlocal_mod_cfg[self.depth] if stage_num == 4 else 2 - - # Make the layer name and parameter name consistent - # with ImageNet pre-trained model - conv = input - for i in range(count): - conv_name = self.na.fix_layer_warp_name(stage_num, count, i) - if self.depth < 50: - is_first = True if i == 0 and stage_num == 2 else False - conv = block_func( - input=conv, - num_filters=ch_out, - stride=2 if i == 0 and stage_num != 2 else 1, - is_first=is_first, - name=conv_name, - dcn_v2=dcn_v2) - - # add non local model - dim_in = conv.shape[1] - nonlocal_name = "nonlocal_conv{}".format(stage_num) - if i % nonlocal_mod == nonlocal_mod - 1: - conv = add_space_nonlocal(conv, dim_in, dim_in, nonlocal_name + '_{}'.format(i), int(dim_in / 2)) - return conv - - def c1_stage(self, input): - out_chan = self._c1_out_chan_num - - conv1_name = self.na.fix_c1_stage_name() - - if self.variant in ['c', 'd']: - conv_def = [ - [out_chan // 2, 3, 2, "conv1_1"], - [out_chan // 2, 3, 1, "conv1_2"], - [out_chan, 3, 1, "conv1_3"], - ] - else: - conv_def = [[out_chan, 7, 2, conv1_name]] - - for (c, k, s, _name) in conv_def: - input = self._conv_norm(input=input, num_filters=c, filter_size=k, stride=s, act='relu', name=_name) - - output = fluid.layers.pool2d(input=input, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - return output - - def __call__(self, input): - assert isinstance(input, Variable) - assert not (set(self.feature_maps) - set([2, 3, 4, 5])), \ - "feature maps {} not in [2, 3, 4, 5]".format(self.feature_maps) - - res_endpoints = [] - - res = input - feature_maps = self.feature_maps - severed_head = getattr(self, 'severed_head', False) - if not severed_head: - res = self.c1_stage(res) - feature_maps = range(2, max(self.feature_maps) + 1) - - for i in feature_maps: - res = self.layer_warp(res, i) - if i in self.feature_maps: - res_endpoints.append(res) - if self.freeze_at >= i: - res.stop_gradient = True - if self.get_prediction: - pool = fluid.layers.pool2d(input=res, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - - out = fluid.layers.fc( - input=pool, - size=self.class_dim, - param_attr=fluid.param_attr.ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv))) - out = fluid.layers.softmax(out) - return out - return OrderedDict( - [('res{}_sum'.format(self.feature_maps[idx]), feat) for idx, feat in enumerate(res_endpoints)]) - - -class ResNetC5(ResNet): - def __init__(self, - depth=50, - freeze_at=2, - norm_type='affine_channel', - freeze_norm=True, - norm_decay=0., - variant='b', - feature_maps=[5], - weight_prefix_name=''): - super(ResNetC5, self).__init__(depth, freeze_at, norm_type, freeze_norm, norm_decay, variant, feature_maps) - self.severed_head = True diff --git a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py b/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py deleted file mode 100644 index 1cde9e3202136fefc81c21812f805c456a12d548..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/retinanet_resnet50_fpn_coco2017/retina_head.py +++ /dev/null @@ -1,381 +0,0 @@ -# coding=utf-8 -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Normal, Constant -from paddle.fluid.regularizer import L2Decay - -__all__ = ['AnchorGenerator', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'RetinaHead'] - - -class AnchorGenerator(object): - # __op__ = fluid.layers.anchor_generator - def __init__(self, - stride=[16.0, 16.0], - anchor_sizes=[32, 64, 128, 256, 512], - aspect_ratios=[0.5, 1., 2.], - variance=[1., 1., 1., 1.]): - self.anchor_sizes = anchor_sizes - self.aspect_ratios = aspect_ratios - self.variance = variance - self.stride = stride - - -class RetinaTargetAssign(object): - # __op__ = fluid.layers.retinanet_target_assign - def __init__(self, positive_overlap=0.5, negative_overlap=0.4): - self.positive_overlap = positive_overlap - self.negative_overlap = negative_overlap - - -class RetinaOutputDecoder(object): - # __op__ = fluid.layers.retinanet_detection_output - def __init__(self, score_thresh=0.05, nms_thresh=0.3, pre_nms_top_n=1000, detections_per_im=100, nms_eta=1.0): - super(RetinaOutputDecoder, self).__init__() - self.score_threshold = score_thresh - self.nms_threshold = nms_thresh - self.nms_top_k = pre_nms_top_n - self.keep_top_k = detections_per_im - self.nms_eta = nms_eta - - -class RetinaHead(object): - """ - Retina Head - - Args: - anchor_generator (object): `AnchorGenerator` instance - target_assign (object): `RetinaTargetAssign` instance - output_decoder (object): `RetinaOutputDecoder` instance - num_convs_per_octave (int): Number of convolution layers in each octave - num_chan (int): Number of octave output channels - max_level (int): Highest level of FPN output - min_level (int): Lowest level of FPN output - prior_prob (float): Used to set the bias init for the class prediction layer - base_scale (int): Anchors are generated based on this scale - num_scales_per_octave (int): Number of anchor scales per octave - num_classes (int): Number of classes - gamma (float): The parameter in focal loss - alpha (float): The parameter in focal loss - sigma (float): The parameter in smooth l1 loss - """ - __inject__ = ['anchor_generator', 'target_assign', 'output_decoder'] - __shared__ = ['num_classes'] - - def __init__(self, - anchor_generator=AnchorGenerator(), - target_assign=RetinaTargetAssign(), - output_decoder=RetinaOutputDecoder(), - num_convs_per_octave=4, - num_chan=256, - max_level=7, - min_level=3, - prior_prob=0.01, - base_scale=4, - num_scales_per_octave=3, - num_classes=81, - gamma=2.0, - alpha=0.25, - sigma=3.0151134457776365): - self.anchor_generator = anchor_generator - self.target_assign = target_assign - self.output_decoder = output_decoder - self.num_convs_per_octave = num_convs_per_octave - self.num_chan = num_chan - self.max_level = max_level - self.min_level = min_level - self.prior_prob = prior_prob - self.base_scale = base_scale - self.num_scales_per_octave = num_scales_per_octave - self.num_classes = num_classes - self.gamma = gamma - self.alpha = alpha - self.sigma = sigma - - def _class_subnet(self, body_feats, spatial_scale): - """ - Get class predictions of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - cls_pred_input(list): Class prediction of all input fpn levels. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - cls_pred_list = [] - for lvl in range(self.min_level, self.max_level + 1): - fpn_name = fpn_name_list[self.max_level - lvl] - subnet_blob = body_feats[fpn_name] - for i in range(self.num_convs_per_octave): - conv_name = 'retnet_cls_conv_n{}_fpn{}'.format(i, lvl) - conv_share_name = 'retnet_cls_conv_n{}_fpn{}'.format(i, self.min_level) - subnet_blob_in = subnet_blob - subnet_blob = fluid.layers.conv2d( - input=subnet_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr(name=conv_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr(name=conv_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) - - # class prediction - cls_name = 'retnet_cls_pred_fpn{}'.format(lvl) - cls_share_name = 'retnet_cls_pred_fpn{}'.format(self.min_level) - num_anchors = self.num_scales_per_octave * len(self.anchor_generator.aspect_ratios) - cls_dim = num_anchors * (self.num_classes - 1) - # bias initialization: b = -log((1 - pai) / pai) - bias_init = float(-np.log((1 - self.prior_prob) / self.prior_prob)) - out_cls = fluid.layers.conv2d( - input=subnet_blob, - num_filters=cls_dim, - filter_size=3, - stride=1, - padding=1, - act=None, - name=cls_name, - param_attr=ParamAttr(name=cls_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr( - name=cls_share_name + '_b', - initializer=Constant(value=bias_init), - learning_rate=2., - regularizer=L2Decay(0.))) - cls_pred_list.append(out_cls) - - return cls_pred_list - - def _bbox_subnet(self, body_feats, spatial_scale): - """ - Get bounding box predictions of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - bbox_pred_input(list): Bounding box prediction of all input fpn - levels. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - bbox_pred_list = [] - for lvl in range(self.min_level, self.max_level + 1): - fpn_name = fpn_name_list[self.max_level - lvl] - subnet_blob = body_feats[fpn_name] - for i in range(self.num_convs_per_octave): - conv_name = 'retnet_bbox_conv_n{}_fpn{}'.format(i, lvl) - conv_share_name = 'retnet_bbox_conv_n{}_fpn{}'.format(i, self.min_level) - subnet_blob_in = subnet_blob - subnet_blob = fluid.layers.conv2d( - input=subnet_blob_in, - num_filters=self.num_chan, - filter_size=3, - stride=1, - padding=1, - act='relu', - name=conv_name, - param_attr=ParamAttr(name=conv_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr(name=conv_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) - - # bbox prediction - bbox_name = 'retnet_bbox_pred_fpn{}'.format(lvl) - bbox_share_name = 'retnet_bbox_pred_fpn{}'.format(self.min_level) - num_anchors = self.num_scales_per_octave * len(self.anchor_generator.aspect_ratios) - bbox_dim = num_anchors * 4 - out_bbox = fluid.layers.conv2d( - input=subnet_blob, - num_filters=bbox_dim, - filter_size=3, - stride=1, - padding=1, - act=None, - name=bbox_name, - param_attr=ParamAttr(name=bbox_share_name + '_w', initializer=Normal(loc=0., scale=0.01)), - bias_attr=ParamAttr(name=bbox_share_name + '_b', learning_rate=2., regularizer=L2Decay(0.))) - bbox_pred_list.append(out_bbox) - return bbox_pred_list - - def _anchor_generate(self, body_feats, spatial_scale): - """ - Get anchor boxes of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Return: - anchor_input(list): Anchors of all input fpn levels with shape of. - anchor_var_input(list): Anchor variance of all input fpn levels with shape. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - fpn_name_list = list(body_feats.keys()) - anchor_list = [] - anchor_var_list = [] - for lvl in range(self.min_level, self.max_level + 1): - anchor_sizes = [] - stride = int(1 / spatial_scale[self.max_level - lvl]) - for octave in range(self.num_scales_per_octave): - anchor_size = stride * (2**(float(octave) / float(self.num_scales_per_octave))) * self.base_scale - anchor_sizes.append(anchor_size) - fpn_name = fpn_name_list[self.max_level - lvl] - anchor, anchor_var = fluid.layers.anchor_generator( - input=body_feats[fpn_name], - anchor_sizes=anchor_sizes, - aspect_ratios=self.anchor_generator.aspect_ratios, - stride=[stride, stride], - variance=self.anchor_generator.variance) - anchor_list.append(anchor) - anchor_var_list.append(anchor_var) - return anchor_list, anchor_var_list - - def _get_output(self, body_feats, spatial_scale): - """ - Get class, bounding box predictions and anchor boxes of all level FPN level. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - - Returns: - cls_pred_input(list): Class prediction of all input fpn levels. - bbox_pred_input(list): Bounding box prediction of all input fpn - levels. - anchor_input(list): Anchors of all input fpn levels with shape of. - anchor_var_input(list): Anchor variance of all input fpn levels with - shape. - """ - assert len(body_feats) == self.max_level - self.min_level + 1 - # class subnet - cls_pred_list = self._class_subnet(body_feats, spatial_scale) - # bbox subnet - bbox_pred_list = self._bbox_subnet(body_feats, spatial_scale) - #generate anchors - anchor_list, anchor_var_list = self._anchor_generate(body_feats, spatial_scale) - cls_pred_reshape_list = [] - bbox_pred_reshape_list = [] - anchor_reshape_list = [] - anchor_var_reshape_list = [] - for i in range(self.max_level - self.min_level + 1): - cls_pred_transpose = fluid.layers.transpose(cls_pred_list[i], perm=[0, 2, 3, 1]) - cls_pred_reshape = fluid.layers.reshape(cls_pred_transpose, shape=(0, -1, self.num_classes - 1)) - bbox_pred_transpose = fluid.layers.transpose(bbox_pred_list[i], perm=[0, 2, 3, 1]) - bbox_pred_reshape = fluid.layers.reshape(bbox_pred_transpose, shape=(0, -1, 4)) - anchor_reshape = fluid.layers.reshape(anchor_list[i], shape=(-1, 4)) - anchor_var_reshape = fluid.layers.reshape(anchor_var_list[i], shape=(-1, 4)) - cls_pred_reshape_list.append(cls_pred_reshape) - bbox_pred_reshape_list.append(bbox_pred_reshape) - anchor_reshape_list.append(anchor_reshape) - anchor_var_reshape_list.append(anchor_var_reshape) - output = {} - output['cls_pred'] = cls_pred_reshape_list - output['bbox_pred'] = bbox_pred_reshape_list - output['anchor'] = anchor_reshape_list - output['anchor_var'] = anchor_var_reshape_list - return output - - def get_prediction(self, body_feats, spatial_scale, im_info): - """ - Get prediction bounding box in test stage. - - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - - Returns: - pred_result(Variable): Prediction result with shape [N, 6]. Each - row has 6 values: [label, confidence, xmin, ymin, xmax, ymax]. - N is the total number of prediction. - """ - output = self._get_output(body_feats, spatial_scale) - cls_pred_reshape_list = output['cls_pred'] - bbox_pred_reshape_list = output['bbox_pred'] - anchor_reshape_list = output['anchor'] - for i in range(self.max_level - self.min_level + 1): - cls_pred_reshape_list[i] = fluid.layers.sigmoid(cls_pred_reshape_list[i]) - pred_result = fluid.layers.retinanet_detection_output( - bboxes=bbox_pred_reshape_list, - scores=cls_pred_reshape_list, - anchors=anchor_reshape_list, - im_info=im_info, - score_threshold=self.output_decoder.score_threshold, - nms_threshold=self.output_decoder.nms_threshold, - nms_top_k=self.output_decoder.nms_top_k, - keep_top_k=self.output_decoder.keep_top_k, - nms_eta=self.output_decoder.nms_eta) - return pred_result - - def get_loss(self, body_feats, spatial_scale, im_info, gt_box, gt_label, is_crowd): - """ - Calculate the loss of retinanet. - Args: - fpn_dict(dict): A dictionary represents the output of FPN with - their name. - spatial_scale(list): A list of multiplicative spatial scale factor. - im_info(Variable): A 2-D LoDTensor with shape [B, 3]. B is the - number of input images, each element consists of im_height, - im_width, im_scale. - gt_box(Variable): The ground-truth bounding boxes with shape [M, 4]. - M is the number of groundtruth. - gt_label(Variable): The ground-truth labels with shape [M, 1]. - M is the number of groundtruth. - is_crowd(Variable): Indicates groud-truth is crowd or not with - shape [M, 1]. M is the number of groundtruth. - - Returns: - Type: dict - loss_cls(Variable): focal loss. - loss_bbox(Variable): smooth l1 loss. - """ - output = self._get_output(body_feats, spatial_scale) - cls_pred_reshape_list = output['cls_pred'] - bbox_pred_reshape_list = output['bbox_pred'] - anchor_reshape_list = output['anchor'] - anchor_var_reshape_list = output['anchor_var'] - - cls_pred_input = fluid.layers.concat(cls_pred_reshape_list, axis=1) - bbox_pred_input = fluid.layers.concat(bbox_pred_reshape_list, axis=1) - anchor_input = fluid.layers.concat(anchor_reshape_list, axis=0) - anchor_var_input = fluid.layers.concat(anchor_var_reshape_list, axis=0) - score_pred, loc_pred, score_tgt, loc_tgt, bbox_weight, fg_num = \ - fluid.layers.rpn_target_assign( - bbox_pred=bbox_pred_input, - cls_logits=cls_pred_input, - anchor_box=anchor_input, - anchor_var=anchor_var_input, - gt_boxes=gt_box, - gt_labels=gt_label, - is_crowd=is_crowd, - im_info=im_info, - num_classes=self.num_classes - 1, - rpn_batch_size_per_im=self.target_assign.rpn_batch_size_per_im, - rpn_straddle_thresh=self.target_assign.rpn_straddle_thresh, - rpn_fg_fraction=self.target_assign.rpn_fg_fraction, - rpn_positive_overlap=self.target_assign.rpn_positive_overlap, - rpn_negative_overlap=self.target_assign.rpn_negative_overlap, - use_random=self.target_assign.use_random) - fg_num = fluid.layers.reduce_sum(fg_num, name='fg_num') - score_tgt = fluid.layers.cast(score_tgt, 'int32') - loss_cls = fluid.layers.sigmoid_focal_loss( - x=score_pred, label=score_tgt, fg_num=fg_num, gamma=self.gamma, alpha=self.alpha) - loss_cls = fluid.layers.reduce_sum(loss_cls, name='loss_cls') - loss_bbox = fluid.layers.smooth_l1( - x=loc_pred, y=loc_tgt, sigma=self.sigma, inside_weight=bbox_weight, outside_weight=bbox_weight) - loss_bbox = fluid.layers.reduce_sum(loss_bbox, name='loss_bbox') - loss_bbox = loss_bbox / fg_num - return {'loss_cls': loss_cls, 'loss_bbox': loss_bbox} diff --git a/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py b/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py deleted file mode 100644 index 2ec816e51989327ad8006b02e878fb86ab235c31..0000000000000000000000000000000000000000 --- a/modules/image/object_detection/yolov3_darknet53_pascalvoc/module.py +++ /dev/null @@ -1,325 +0,0 @@ -import os - -import paddle -import paddle.nn as nn -import paddle.nn.functional as F -from paddle.nn.initializer import Normal, Constant -from paddle.regularizer import L2Decay -from paddlehub.module.cv_module import Yolov3Module -import paddlehub.process.detect_transforms as T -from paddlehub.module.module import moduleinfo - - -class ConvBNLayer(nn.Layer): - """Basic block for Darknet""" - - def __init__(self, - ch_in: int, - ch_out: int, - filter_size: int = 3, - stride: int = 1, - groups: int = 1, - padding: int = 0, - act: str = 'leakly', - is_test: bool = False): - super(ConvBNLayer, self).__init__() - - self.conv = nn.Conv2d( - ch_in, - ch_out, - filter_size, - padding=padding, - stride=stride, - groups=groups, - weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), - bias_attr=False) - - self.batch_norm = nn.BatchNorm( - num_channels=ch_out, - is_test=is_test, - param_attr=paddle.ParamAttr(initializer=Normal(0., 0.02), regularizer=L2Decay(0.))) - self.act = act - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - out = self.conv(inputs) - out = self.batch_norm(out) - if self.act == "leakly": - out = F.leaky_relu(x=out, negative_slope=0.1) - return out - - -class DownSample(nn.Layer): - """Downsample block for Darknet""" - - def __init__(self, - ch_in: int, - ch_out: int, - filter_size: int = 3, - stride: int = 2, - padding: int = 1, - is_test: bool = False): - super(DownSample, self).__init__() - - self.conv_bn_layer = ConvBNLayer( - ch_in=ch_in, ch_out=ch_out, filter_size=filter_size, stride=stride, padding=padding, is_test=is_test) - self.ch_out = ch_out - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - out = self.conv_bn_layer(inputs) - return out - - -class BasicBlock(nn.Layer): - """Basic residual block for Darknet""" - - def __init__(self, ch_in: int, ch_out: int, is_test: bool = False): - super(BasicBlock, self).__init__() - - self.conv1 = ConvBNLayer(ch_in=ch_in, ch_out=ch_out, filter_size=1, stride=1, padding=0, is_test=is_test) - self.conv2 = ConvBNLayer(ch_in=ch_out, ch_out=ch_out * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - conv1 = self.conv1(inputs) - conv2 = self.conv2(conv1) - out = paddle.elementwise_add(x=inputs, y=conv2, act=None) - return out - - -class LayerWarp(nn.Layer): - """Warp layer composed by basic residual blocks""" - - def __init__(self, ch_in: int, ch_out: int, count: int, is_test: bool = False): - super(LayerWarp, self).__init__() - self.basicblock0 = BasicBlock(ch_in, ch_out, is_test=is_test) - self.res_out_list = [] - for i in range(1, count): - res_out = self.add_sublayer("basic_block_%d" % (i), BasicBlock(ch_out * 2, ch_out, is_test=is_test)) - self.res_out_list.append(res_out) - self.ch_out = ch_out - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - y = self.basicblock0(inputs) - for basic_block_i in self.res_out_list: - y = basic_block_i(y) - return y - - -class DarkNet53_conv_body(nn.Layer): - """Darknet53 - Args: - ch_in(int): Input channels, default is 3. - is_test (bool): Set the test mode, default is True. - """ - - def __init__(self, ch_in: int = 3, is_test: bool = False): - super(DarkNet53_conv_body, self).__init__() - self.stages = [1, 2, 8, 8, 4] - self.stages = self.stages[0:5] - - self.conv0 = ConvBNLayer(ch_in=ch_in, ch_out=32, filter_size=3, stride=1, padding=1, is_test=is_test) - - self.downsample0 = DownSample(ch_in=32, ch_out=32 * 2, is_test=is_test) - self.darknet53_conv_block_list = [] - self.downsample_list = [] - ch_in = [64, 128, 256, 512, 1024] - - for i, stage in enumerate(self.stages): - conv_block = self.add_sublayer("stage_%d" % (i), - LayerWarp(int(ch_in[i]), 32 * (2**i), stage, is_test=is_test)) - self.darknet53_conv_block_list.append(conv_block) - - for i in range(len(self.stages) - 1): - downsample = self.add_sublayer( - "stage_%d_downsample" % i, DownSample( - ch_in=32 * (2**(i + 1)), ch_out=32 * (2**(i + 2)), is_test=is_test)) - self.downsample_list.append(downsample) - - def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: - out = self.conv0(inputs) - out = self.downsample0(out) - blocks = [] - for i, conv_block_i in enumerate(self.darknet53_conv_block_list): - out = conv_block_i(out) - blocks.append(out) - if i < len(self.stages) - 1: - out = self.downsample_list[i](out) - return blocks[-1:-4:-1] - - -class YoloDetectionBlock(nn.Layer): - """Basic block for Yolov3""" - - def __init__(self, ch_in: int, channel: int, is_test: bool = True): - super(YoloDetectionBlock, self).__init__() - - assert channel % 2 == 0, \ - "channel {} cannot be divided by 2".format(channel) - - self.conv0 = ConvBNLayer(ch_in=ch_in, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) - self.conv1 = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - self.conv2 = ConvBNLayer(ch_in=channel * 2, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) - self.conv3 = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - self.route = ConvBNLayer(ch_in=channel * 2, ch_out=channel, filter_size=1, stride=1, padding=0, is_test=is_test) - self.tip = ConvBNLayer(ch_in=channel, ch_out=channel * 2, filter_size=3, stride=1, padding=1, is_test=is_test) - - def forward(self, inputs): - out = self.conv0(inputs) - out = self.conv1(out) - out = self.conv2(out) - out = self.conv3(out) - route = self.route(out) - tip = self.tip(route) - return route, tip - - -class Upsample(nn.Layer): - """Upsample block for Yolov3""" - - def __init__(self, scale: int = 2): - super(Upsample, self).__init__() - self.scale = scale - - def forward(self, inputs: paddle.Tensor): - shape_nchw = paddle.to_tensor(inputs.shape) - shape_hw = paddle.slice(shape_nchw, axes=[0], starts=[2], ends=[4]) - shape_hw.stop_gradient = True - in_shape = paddle.cast(shape_hw, dtype='int32') - out_shape = in_shape * self.scale - out_shape.stop_gradient = True - out = F.resize_nearest(input=inputs, scale=self.scale, actual_shape=out_shape) - return out - - -@moduleinfo( - name="yolov3_darknet53_pascalvoc", - type="CV/image_editing", - author="paddlepaddle", - author_email="", - summary="Yolov3 is a detection model, this module is trained with VOC dataset.", - version="1.0.0", - meta=Yolov3Module) -class YOLOv3(nn.Layer): - """YOLOV3 for detection - - Args: - ch_in(int): Input channels, default is 3. - class_num(int): Categories for detection,if dataset is voc, class_num is 20. - ignore_thresh(float): The ignore threshold to ignore confidence loss. - valid_thresh(float): Threshold to filter out bounding boxes with low confidence score. - nms_topk(int): Maximum number of detections to be kept according to the confidences after the filtering - detections based on score_threshold. - nms_posk(int): Number of total bboxes to be kept per image after NMS step. -1 means keeping all bboxes after NMS - step. - nms_thresh (float): The threshold to be used in NMS. Default: 0.3. - is_train (bool): Set the train mode, default is True. - load_checkpoint(str): Whether to load checkpoint. - """ - - def __init__(self, - ch_in: int = 3, - class_num: int = 20, - ignore_thresh: float = 0.7, - valid_thresh: float = 0.005, - nms_topk: int = 400, - nms_posk: int = 100, - nms_thresh: float = 0.45, - is_train: bool = True, - load_checkpoint: str = None): - super(YOLOv3, self).__init__() - - self.is_train = is_train - self.block = DarkNet53_conv_body(ch_in=ch_in, is_test=not self.is_train) - self.block_outputs = [] - self.yolo_blocks = [] - self.route_blocks_2 = [] - self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] - self.class_num = class_num - self.ignore_thresh = ignore_thresh - self.valid_thresh = valid_thresh - self.nms_topk = nms_topk - self.nms_posk = nms_posk - self.nms_thresh = nms_thresh - ch_in_list = [1024, 768, 384] - - for i in range(3): - yolo_block = self.add_sublayer( - "yolo_detecton_block_%d" % (i), - YoloDetectionBlock(ch_in_list[i], channel=512 // (2**i), is_test=not self.is_train)) - self.yolo_blocks.append(yolo_block) - - num_filters = len(self.anchor_masks[i]) * (self.class_num + 5) - block_out = self.add_sublayer( - "block_out_%d" % (i), - nn.Conv2d( - 1024 // (2**i), - num_filters, - 1, - stride=1, - padding=0, - weight_attr=paddle.ParamAttr(initializer=Normal(0., 0.02)), - bias_attr=paddle.ParamAttr(initializer=Constant(0.0), regularizer=L2Decay(0.)))) - self.block_outputs.append(block_out) - - if i < 2: - route = self.add_sublayer( - "route2_%d" % i, - ConvBNLayer( - ch_in=512 // (2**i), - ch_out=256 // (2**i), - filter_size=1, - stride=1, - padding=0, - is_test=(not self.is_train))) - self.route_blocks_2.append(route) - self.upsample = Upsample() - - if load_checkpoint is not None: - model_dict = paddle.load(load_checkpoint)[0] - self.set_dict(model_dict) - print("load custom checkpoint success") - - else: - checkpoint = os.path.join(self.directory, 'yolov3_darknet53_voc.pdparams') - if not os.path.exists(checkpoint): - os.system( - 'wget https://paddlehub.bj.bcebos.com/dygraph/detection/yolov3_darknet53_voc.pdparams -O ' \ - + checkpoint) - model_dict = paddle.load(checkpoint)[0] - self.set_dict(model_dict) - print("load pretrained checkpoint success") - - def transform(self, img): - if self.is_train: - transform = T.Compose([ - T.RandomDistort(), - T.RandomExpand(fill=[0.485, 0.456, 0.406]), - T.RandomCrop(), - T.Resize(target_size=416), - T.RandomFlip(), - T.ShuffleBox(), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - else: - transform = T.Compose([ - T.Resize(target_size=416, interp='CUBIC'), - T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) - ]) - - return transform(img) - - def forward(self, inputs: paddle.Tensor): - outputs = [] - blocks = self.block(inputs) - route = None - for i, block in enumerate(blocks): - if i > 0: - block = paddle.concat([route, block], axis=1) - route, tip = self.yolo_blocks[i](block) - block_out = self.block_outputs[i](tip) - outputs.append(block_out) - if i < 2: - route = self.route_blocks_2[i](route) - route = self.upsample(route) - - return outputs diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/module.py b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py index 1c4d22f05753c6657fe5a0fceb4646062ea7d483..090223cc4009c410c698ae37ef89033ebb90d065 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/module.py +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/module.py @@ -9,7 +9,8 @@ from functools import partial import numpy as np import paddle.fluid as fluid import paddlehub as hub -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor +from paddle.inference import Config +from paddle.inference import create_predictor from paddlehub.module.module import moduleinfo, runnable, serving from paddlehub.common.paddle_helper import add_vars_prefix @@ -23,39 +24,62 @@ from yolov3_darknet53_vehicles.yolo_head import MultiClassNMS, YOLOv3Head name="yolov3_darknet53_vehicles", version="1.0.2", type="CV/object_detection", - summary= - "Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53.", + summary="Baidu's YOLOv3 model for vehicles detection, with backbone DarkNet53.", author="paddlepaddle", author_email="paddle-dev@baidu.com") class YOLOv3DarkNet53Vehicles(hub.Module): def _initialize(self): - self.default_pretrained_model_path = os.path.join( - self.directory, "yolov3_darknet53_vehicles_model") - self.label_names = load_label_info( - os.path.join(self.directory, "label_file.txt")) + self.default_pretrained_model_path = os.path.join(self.directory, "yolov3_darknet53_vehicles_model") + self.label_names = load_label_info(os.path.join(self.directory, "label_file.txt")) self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting. """ - cpu_config = AnalysisConfig(self.default_pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.default_pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - cpu_config.switch_ir_optim(False) - self.cpu_predictor = create_paddle_predictor(cpu_config) + self.cpu_predictor = create_predictor(cpu_config) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.default_pretrained_model_path) + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.default_pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.default_pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.default_pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) def context(self, trainable=True, pretrained=True, get_prediction=False): """ @@ -76,20 +100,18 @@ class YOLOv3DarkNet53Vehicles(hub.Module): with fluid.program_guard(context_prog, startup_program): with fluid.unique_name.guard(): # image - image = fluid.layers.data( - name='image', shape=[3, 608, 608], dtype='float32') + image = fluid.layers.data(name='image', shape=[3, 608, 608], dtype='float32') # backbone backbone = DarkNet(norm_type='sync_bn', norm_decay=0., depth=53) # body_feats body_feats = backbone(image) # im_size - im_size = fluid.layers.data( - name='im_size', shape=[2], dtype='int32') + im_size = fluid.layers.data(name='im_size', shape=[2], dtype='int32') # yolo_head yolo_head = YOLOv3Head( anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], - anchors=[[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], - [54, 50], [101, 80], [139, 145], [253, 224]], + anchors=[[8, 9], [10, 23], [19, 15], [23, 33], [40, 25], [54, 50], [101, 80], [139, 145], + [253, 224]], norm_decay=0., num_classes=6, ignore_thresh=0.7, @@ -102,8 +124,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module): normalized=False, score_threshold=0.005)) # head_features - head_features, body_features = yolo_head._get_outputs( - body_feats, is_train=trainable) + head_features, body_features = yolo_head._get_outputs(body_feats, is_train=trainable) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -112,35 +133,24 @@ class YOLOv3DarkNet53Vehicles(hub.Module): # var_prefix var_prefix = '@HUB_{}@'.format(self.name) # name of inputs - inputs = { - 'image': var_prefix + image.name, - 'im_size': var_prefix + im_size.name - } + inputs = {'image': var_prefix + image.name, 'im_size': var_prefix + im_size.name} # name of outputs if get_prediction: bbox_out = yolo_head.get_prediction(head_features, im_size) outputs = {'bbox_out': [var_prefix + bbox_out.name]} else: outputs = { - 'head_features': - [var_prefix + var.name for var in head_features], - 'body_features': - [var_prefix + var.name for var in body_features] + 'head_features': [var_prefix + var.name for var in head_features], + 'body_features': [var_prefix + var.name for var in body_features] } # add_vars_prefix add_vars_prefix(context_prog, var_prefix) add_vars_prefix(fluid.default_startup_program(), var_prefix) # inputs - inputs = { - key: context_prog.global_block().vars[value] - for key, value in inputs.items() - } + inputs = {key: context_prog.global_block().vars[value] for key, value in inputs.items()} # outputs outputs = { - key: [ - context_prog.global_block().vars[varname] - for varname in value - ] + key: [context_prog.global_block().vars[varname] for varname in value] for key, value in outputs.items() } # trainable @@ -150,14 +160,9 @@ class YOLOv3DarkNet53Vehicles(hub.Module): if pretrained: def _if_exist(var): - return os.path.exists( - os.path.join(self.default_pretrained_model_path, - var.name)) - - fluid.io.load_vars( - exe, - self.default_pretrained_model_path, - predicate=_if_exist) + return os.path.exists(os.path.join(self.default_pretrained_model_path, var.name)) + + fluid.io.load_vars(exe, self.default_pretrained_model_path, predicate=_if_exist) else: exe.run(startup_program) @@ -170,7 +175,8 @@ class YOLOv3DarkNet53Vehicles(hub.Module): use_gpu=False, output_dir='yolov3_vehicles_detect_output', score_thresh=0.2, - visualization=True): + visualization=True, + use_device=None): """API of Object Detection. Args: @@ -181,6 +187,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module): output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): threshold for object detecion. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: res (list[dict]): The result of vehicles detecion. keys include 'data', 'save_path', the corresponding value is: @@ -193,14 +200,25 @@ class YOLOv3DarkNet53Vehicles(hub.Module): confidence (float): The confidence of detection result. save_path (str, optional): The path to save output images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Attempt to use GPU for prediction, but environment variable CUDA_VISIBLE_DEVICES was not set correctly." - ) + + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor paths = paths if paths else list() data_reader = partial(reader, paths, images) @@ -208,19 +226,27 @@ class YOLOv3DarkNet53Vehicles(hub.Module): res = [] for iter_id, feed_data in enumerate(batch_reader()): feed_data = np.array(feed_data) - image_tensor = PaddleTensor(np.array(list(feed_data[:, 0]))) - im_size_tensor = PaddleTensor(np.array(list(feed_data[:, 1]))) - if use_gpu: - data_out = self.gpu_predictor.run( - [image_tensor, im_size_tensor]) - else: - data_out = self.cpu_predictor.run( - [image_tensor, im_size_tensor]) + + input_names = predictor.get_input_names() + image_data = np.array(list(feed_data[:, 0])) + image_size_data = np.array(list(feed_data[:, 1])) + + image_tensor = predictor.get_input_handle(input_names[0]) + image_tensor.reshape(image_data.shape) + image_tensor.copy_from_cpu(image_data.copy()) + + image_size_tensor = predictor.get_input_handle(input_names[1]) + image_size_tensor.reshape(image_size_data.shape) + image_size_tensor.copy_from_cpu(image_size_data.copy()) + + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) output = postprocess( paths=paths, images=images, - data_out=data_out, + data_out=output_handle, score_thresh=score_thresh, label_names=self.label_names, output_dir=output_dir, @@ -229,11 +255,7 @@ class YOLOv3DarkNet53Vehicles(hub.Module): res.extend(output) return res - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): + def save_inference_model(self, dirname, model_filename=None, params_filename=None, combined=True): if combined: model_filename = "__model__" if not model_filename else model_filename params_filename = "__params__" if not params_filename else params_filename @@ -271,12 +293,9 @@ class YOLOv3DarkNet53Vehicles(hub.Module): prog='hub run {}'.format(self.name), usage='%(prog)s', add_help=True) - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") + title="Config options", description="Run configuration for controlling module behavior, not required.") self.add_module_config_arg() self.add_module_input_arg() args = self.parser.parse_args(argvs) @@ -286,7 +305,8 @@ class YOLOv3DarkNet53Vehicles(hub.Module): use_gpu=args.use_gpu, output_dir=args.output_dir, visualization=args.visualization, - score_thresh=args.score_thresh) + score_thresh=args.score_thresh, + use_device=args.use_device) return results def add_module_config_arg(self): @@ -294,34 +314,24 @@ class YOLOv3DarkNet53Vehicles(hub.Module): Add the command config options. """ self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") + '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") self.arg_config_group.add_argument( '--output_dir', type=str, default='yolov3_vehicles_detect_output', help="The directory to save output images.") self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + self.arg_config_group.add_argument( + '--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ Add the command input options. """ + self.arg_input_group.add_argument('--input_path', type=str, help="path to image.") + self.arg_input_group.add_argument('--batch_size', type=ast.literal_eval, default=1, help="batch size.") self.arg_input_group.add_argument( - '--input_path', type=str, help="path to image.") - self.arg_input_group.add_argument( - '--batch_size', - type=ast.literal_eval, - default=1, - help="batch size.") - self.arg_input_group.add_argument( - '--score_thresh', - type=ast.literal_eval, - default=0.2, - help="threshold for object detecion.") + '--score_thresh', type=ast.literal_eval, default=0.2, help="threshold for object detecion.") diff --git a/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py b/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py index 2f9a42d9c0ce6fc2d819349580d850b908ccfb51..5aa464e6bf950b7c64b271e2673663d689ad1f24 100644 --- a/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py +++ b/modules/image/object_detection/yolov3_darknet53_vehicles/processor.py @@ -50,21 +50,15 @@ def draw_bounding_box_on_image(image_path, data_list, save_dir): image = Image.open(image_path) draw = ImageDraw.Draw(image) for data in data_list: - left, right, top, bottom = data['left'], data['right'], data[ - 'top'], data['bottom'] + left, right, top, bottom = data['left'], data['right'], data['top'], data['bottom'] # draw bbox - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), - (left, top)], - width=2, - fill='red') + draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=2, fill='red') # draw label if image.mode == 'RGB': text = data['label'] + ": %.2f%%" % (100 * data['confidence']) textsize_width, textsize_height = draw.textsize(text=text) draw.rectangle( - xy=(left, top - (textsize_height + 5), - left + textsize_width + 10, top), - fill=(255, 255, 255)) + xy=(left, top - (textsize_height + 5), left + textsize_width + 10, top), fill=(255, 255, 255)) draw.text(xy=(left, top - 15), text=text, fill=(0, 0, 0)) save_name = get_save_image_name(image, save_dir, image_path) @@ -92,14 +86,7 @@ def load_label_info(file_path): return label_names -def postprocess(paths, - images, - data_out, - score_thresh, - label_names, - output_dir, - handle_id, - visualization=True): +def postprocess(paths, images, data_out, score_thresh, label_names, output_dir, handle_id, visualization=True): """ postprocess the lod_tensor produced by fluid.Executor.run @@ -107,8 +94,6 @@ def postprocess(paths, paths (list[str]): The paths of images. images (list(numpy.ndarray)): images data, shape of each is [H, W, C] data_out (lod_tensor): data output of predictor. - batch_size (int): batch size. - use_gpu (bool): Whether to use gpu. output_dir (str): The path to store output images. visualization (bool): Whether to save image or not. score_thresh (float): the low limit of bounding box. @@ -126,9 +111,8 @@ def postprocess(paths, confidence (float): The confidence of detection result. save_path (str): The path to save output images. """ - lod_tensor = data_out[0] - lod = lod_tensor.lod[0] - results = lod_tensor.as_ndarray() + results = data_out.copy_to_cpu() + lod = data_out.lod()[0] check_dir(output_dir) @@ -157,9 +141,7 @@ def postprocess(paths, org_img = org_img.astype(np.uint8) org_img = Image.fromarray(org_img[:, :, ::-1]) if visualization: - org_img_path = get_save_image_name( - org_img, output_dir, 'image_numpy_{}'.format( - (handle_id + index))) + org_img_path = get_save_image_name(org_img, output_dir, 'image_numpy_{}'.format((handle_id + index))) org_img.save(org_img_path) org_img_height = org_img.height org_img_width = org_img.width @@ -175,13 +157,11 @@ def postprocess(paths, dt = {} dt['label'] = label_names[category_id] dt['confidence'] = float(confidence) - dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox( - bbox, org_img_width, org_img_height) + dt['left'], dt['top'], dt['right'], dt['bottom'] = clip_bbox(bbox, org_img_width, org_img_height) output_i['data'].append(dt) output.append(output_i) if visualization: - output_i['save_path'] = draw_bounding_box_on_image( - org_img_path, output_i['data'], output_dir) + output_i['save_path'] = draw_bounding_box_on_image(org_img_path, output_i['data'], output_dir) return output diff --git a/modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/Readme.md b/modules/image/semantic_segmentation/Extract_Line_Draft/Readme.md similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/Readme.md rename to modules/image/semantic_segmentation/Extract_Line_Draft/Readme.md diff --git a/modules/image/semantic_segmentation/Extract_Line_Draft/__init__.py b/modules/image/semantic_segmentation/Extract_Line_Draft/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/function.py b/modules/image/semantic_segmentation/Extract_Line_Draft/function.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/function.py rename to modules/image/semantic_segmentation/Extract_Line_Draft/function.py diff --git a/modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/module.py b/modules/image/semantic_segmentation/Extract_Line_Draft/module.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/Extract_Line_Draft/module.py rename to modules/image/semantic_segmentation/Extract_Line_Draft/module.py diff --git a/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md b/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md index 4e91fcfe5278a73b6b93938f649f1b88154b5f6c..17d2979a19b9df5963b341e42347921e40c94c40 100644 --- a/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md +++ b/modules/image/semantic_segmentation/ExtremeC3_Portrait_Segmentation/README.md @@ -1,56 +1,89 @@ -## 概述 -* 基于 ExtremeC3 模型实现的轻量化人像分割模型 -* 模型具体规格如下: - |model|ExtremeC3| - |----|----| - |Param|0.038 M| - |Flop|0.128 G| - -* 模型参数转换至 [ext_portrait_segmentation](https://github.com/clovaai/ext_portrait_segmentation) 项目 -* 感谢 [ext_portrait_segmentation](https://github.com/clovaai/ext_portrait_segmentation) 项目提供的开源代码和模型 - -## 效果展示 -![](https://ai-studio-static-online.cdn.bcebos.com/1261398a98e24184852bdaff5a4e1dbd7739430f59fb47e8b84e3a2cfb976107) - -## API -```python -def Segmentation( - images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False): -``` -人像分割 API - -**参数** -* images (list[np.ndarray]) : 输入图像数据列表(BGR) -* paths (list[str]) : 输入图像路径列表 -* batch_size (int) : 数据批大小 -* output_dir (str) : 可视化图像输出目录 -* visualization (bool) : 是否可视化 - -**返回** -* results (list[dict{"mask":np.ndarray,"result":np.ndarray}]): 输出图像数据列表 - -**代码示例** -```python -import cv2 -import paddlehub as hub - -model = hub.Module(name='ExtremeC3_Portrait_Segmentation') - -result = model.Segmentation( - images=[cv2.imread('/PATH/TO/IMAGE')], - paths=None, - batch_size=1, - output_dir='output', - visualization=False) -``` - -## 查看代码 -https://github.com/clovaai/ext_portrait_segmentation - -## 依赖 -paddlepaddle >= 2.0.0rc0 -paddlehub >= 2.0.0b1 +# ExtremeC3_Portrait_Segmentation + +|模型名称|ExtremeC3_Portrait_Segmentation| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ExtremeC3| +|数据集|EG1800, Baidu fashion dataset| +|是否支持Fine-tuning|否| +|模型大小|0.038MB| +|指标|-| +|最新更新日期|2021-02-26| + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例: +

+
+

+ + +- ### 模型介绍 + * 基于 ExtremeC3 模型实现的轻量化人像分割模型 + + * 更多详情请参考: [ExtremeC3_Portrait_Segmentation](https://github.com/clovaai/ext_portrait_segmentation) 项目 + +## 二、安装 + +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ExtremeC3_Portrait_Segmentation + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、代码示例 + + ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='ExtremeC3_Portrait_Segmentation') + + result = model.Segmentation( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + output_dir='output', + visualization=False) + ``` + + - ### 2、API + + ```python + def Segmentation( + images=None, + paths=None, + batch_size=1, + output_dir='output', + visualization=False): + ``` + - 人像分割 API + + - **参数** + * images (list[np.ndarray]) : 输入图像数据列表(BGR) + * paths (list[str]) : 输入图像路径列表 + * batch_size (int) : 数据批大小 + * output_dir (str) : 可视化图像输出目录 + * visualization (bool) : 是否可视化 + + - **返回** + * results (list[dict{"mask":np.ndarray,"result":np.ndarray}]): 输出图像数据列表 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md rename to modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/README.md diff --git a/modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/__init__.py b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/__init__.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/__init__.py rename to modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/__init__.py diff --git a/modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/fcn.py b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/fcn.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/fcn.py rename to modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/fcn.py diff --git a/modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/hrnet.py b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/hrnet.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/hrnet.py rename to modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/hrnet.py diff --git a/modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/layers.py b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/layers.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/layers.py rename to modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/model/layers.py diff --git a/modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/module.py b/modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/module.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/module.py rename to modules/image/semantic_segmentation/FCN_HRNet_W18_Face_Seg/module.py diff --git a/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP/README.md b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP/README.md new file mode 100644 index 0000000000000000000000000000000000000000..002bfeebc0201e7a825f24bfb850a3fb564a1c25 --- /dev/null +++ b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP/README.md @@ -0,0 +1,92 @@ + +# Pneumonia_CT_LKM_PP + +|模型名称|Pneumonia_CT_LKM_PP| +| :--- | :---: | +|类别|图像-图像分割| +|网络|-| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|35M| +|指标|-| +|最新更新日期|2021-02-26| + + +## 一、模型基本信息 + + +- ### 模型介绍 + + - 肺炎CT影像分析模型(Pneumonia-CT-LKM-PP)可以高效地完成对患者CT影像的病灶检测识别、病灶轮廓勾画,通过一定的后处理代码,可以分析输出肺部病灶的数量、体积、病灶占比等全套定量指标。值得强调的是,该系统采用的深度学习算法模型充分训练了所收集到的高分辨率和低分辨率的CT影像数据,能极好地适应不同等级CT影像设备采集的检查数据,有望为医疗资源受限和医疗水平偏低的基层医院提供有效的肺炎辅助诊断工具。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install Pneumonia_CT_LKM_PP==1.0.0 + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + pneumonia = hub.Module(name="Pneumonia_CT_LKM_PP") + + input_only_lesion_np_path = "/PATH/TO/ONLY_LESION_NP" + input_both_lesion_np_path = "/PATH/TO/LESION_NP" + input_both_lung_np_path = "/PATH/TO/LUNG_NP" + + # set input dict + input_dict = {"image_np_path": [ + [input_only_lesion_np_path], + [input_both_lesion_np_path, input_both_lung_np_path], + ]} + + # execute predict and print the result + results = pneumonia.segmentation(data=input_dict) + for result in results: + print(result) + + ``` + + +- ### 2、API + + ```python + def segmentation(data) + ``` + + - 预测API,用于肺炎CT影像分析。 + + - **参数** + + * data (dict): key,str类型,"image_np_path";value,list类型,每个元素为list类型,[用于病灶分析的影像numpy数组(文件后缀名.npy)路径, 用于肺部分割的影像numpy数组路径],如果仅进行病灶分析不进行肺部分割,可以省略用于肺部分割的影像numpy数组路径 + + + - **返回** + + * result (list\[dict\]): 每个元素为对应输入的预测结果。每个预测结果为dict类型:预测结果有以下字段: + * input_lesion_np_path: 存放用于病灶分析的numpy数组路径; + * output_lesion_np: 存放病灶分析结果,numpy数组; + * input_lesion_np_path:存放用于肺部分割的numpy数组路径(仅当对应输入包含肺部影像numpy时存在该字段) + * output_lung_np:存放肺部分割结果,numpy数组(仅当对应输入包含肺部影像numpy时存在该字段) + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung/README.md b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung/README.md new file mode 100644 index 0000000000000000000000000000000000000000..24a6df13d15294b3c0f859aff5a7ff20befd9b9a --- /dev/null +++ b/modules/image/semantic_segmentation/Pneumonia_CT_LKM_PP_lung/README.md @@ -0,0 +1,92 @@ + +# Pneumonia_CT_LKM_PP_lung + +|模型名称|Pneumonia_CT_LKM_PP_lung| +| :--- | :---: | +|类别|图像-图像分割| +|网络|-| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|35M| +|指标|-| +|最新更新日期|2021-02-26| + + +## 一、模型基本信息 + + +- ### 模型介绍 + + - 肺炎CT影像分析模型(Pneumonia-CT-LKM-PP)可以高效地完成对患者CT影像的病灶检测识别、病灶轮廓勾画,通过一定的后处理代码,可以分析输出肺部病灶的数量、体积、病灶占比等全套定量指标。值得强调的是,该系统采用的深度学习算法模型充分训练了所收集到的高分辨率和低分辨率的CT影像数据,能极好地适应不同等级CT影像设备采集的检查数据,有望为医疗资源受限和医疗水平偏低的基层医院提供有效的肺炎辅助诊断工具。(此module为Pneumonia_CT_LKM_PP的子module。) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install Pneumonia_CT_LKM_PP_lung==1.0.0 + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + pneumonia = hub.Module(name="Pneumonia_CT_LKM_PP_lung") + + input_only_lesion_np_path = "/PATH/TO/ONLY_LESION_NP" + input_both_lesion_np_path = "/PATH/TO/LESION_NP" + input_both_lung_np_path = "/PATH/TO/LUNG_NP" + + # set input dict + input_dict = {"image_np_path": [ + [input_only_lesion_np_path], + [input_both_lesion_np_path, input_both_lung_np_path], + ]} + + # execute predict and print the result + results = pneumonia.segmentation(data=input_dict) + for result in results: + print(result) + + ``` + + +- ### 2、API + + ```python + def segmentation(data) + ``` + + - 预测API,用于肺炎CT影像分析。 + + - **参数** + + * data (dict): key,str类型,"image_np_path";value,list类型,每个元素为list类型,[用于病灶分析的影像numpy数组(文件后缀名.npy)路径, 用于肺部分割的影像numpy数组路径],如果仅进行病灶分析不进行肺部分割,可以省略用于肺部分割的影像numpy数组路径 + + + - **返回** + + * result (list\[dict\]): 每个元素为对应输入的预测结果。每个预测结果为dict类型:预测结果有以下字段: + * input_lesion_np_path: 存放用于病灶分析的numpy数组路径; + * output_lesion_np: 存放病灶分析结果,numpy数组; + * input_lesion_np_path:存放用于肺部分割的numpy数组路径(仅当对应输入包含肺部影像numpy时存在该字段) + * output_lung_np:存放肺部分割结果,numpy数组(仅当对应输入包含肺部影像numpy时存在该字段) + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/image/semantic_segmentation/U2Net/README.md b/modules/image/semantic_segmentation/U2Net/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bedd1cc65feebb68d754814eecbbbb03d35397bf --- /dev/null +++ b/modules/image/semantic_segmentation/U2Net/README.md @@ -0,0 +1,90 @@ +# U2Net + +|模型名称|U2Net| +| :--- | :---: | +|类别|图像-图像分割| +|网络|U^2Net| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|254MB| +|指标|-| +|最新更新日期|2021-02-26| + + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 效果展示 + +

+ +

+ +- ### 模型介绍 + * ![](http://latex.codecogs.com/svg.latex?U^2Net)的网络结构如下图,其类似于编码-解码(Encoder-Decoder)结构的 U-Net,每个 stage 由新提出的 RSU模块(residual U-block) 组成. 例如,En_1 即为基于 RSU 构建的 + * - 更多详情请参考:[U2Net](https://github.com/xuebinqin/U-2-Net) + + ![](https://ai-studio-static-online.cdn.bcebos.com/999d37b4ffdd49dc9e3315b7cec7b2c6918fdd57c8594ced9dded758a497913d) + +## 二、安装 + +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install U2Net + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 +- ### 1、代码示例 + + ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='U2Net') + + result = model.Segmentation( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=True) + ``` + - ### 2、API + + ```python + def Segmentation( + images=None, + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=False): + ``` + - 图像前景背景分割 API + + - **参数** + * images (list[np.ndarray]) : 输入图像数据列表(BGR) + * paths (list[str]) : 输入图像路径列表 + * batch_size (int) : 数据批大小 + * input_size (int) : 输入图像大小 + * output_dir (str) : 可视化图像输出目录 + * visualization (bool) : 是否可视化 + + - **返回** + * results (list[np.ndarray]): 输出图像数据列表 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/thirdparty/image/semantic_segmentation/U2Net/module.py b/modules/image/semantic_segmentation/U2Net/module.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/U2Net/module.py rename to modules/image/semantic_segmentation/U2Net/module.py diff --git a/modules/thirdparty/image/semantic_segmentation/U2Net/processor.py b/modules/image/semantic_segmentation/U2Net/processor.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/U2Net/processor.py rename to modules/image/semantic_segmentation/U2Net/processor.py diff --git a/modules/thirdparty/image/semantic_segmentation/U2Net/u2net.py b/modules/image/semantic_segmentation/U2Net/u2net.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/U2Net/u2net.py rename to modules/image/semantic_segmentation/U2Net/u2net.py diff --git a/modules/image/semantic_segmentation/U2Netp/README.md b/modules/image/semantic_segmentation/U2Netp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b476a9f35007e4a74398d95998c4998f6d2c2c13 --- /dev/null +++ b/modules/image/semantic_segmentation/U2Netp/README.md @@ -0,0 +1,101 @@ +# U2Netp + +|模型名称|U2Netp| +| :--- | :---: | +|类别|图像-图像分割| +|网络|U^2Net| +|数据集|-| +|是否支持Fine-tuning|否| +|模型大小|6.7MB| +|指标|-| +|最新更新日期|2021-02-26| + + + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例: +

+ +

+ + +- ### 模型介绍 + + * U2Netp的网络结构如下图,其类似于编码-解码(Encoder-Decoder)结构的 U-Net, 每个 stage 由新提出的 RSU模块(residual U-block) 组成. 例如,En_1 即为基于 RSU 构建的, 它是一个小型化的模型 + + ![](https://ai-studio-static-online.cdn.bcebos.com/999d37b4ffdd49dc9e3315b7cec7b2c6918fdd57c8594ced9dded758a497913d) + + * - 更多详情请参考:[U2Net](https://github.com/xuebinqin/U-2-Net) + + +## 二、安装 + +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + - ```shell + $ hub install U2Netp + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 +- ### 1、代码示例 + + ```python + import cv2 + import paddlehub as hub + + model = hub.Module(name='U2Netp') + + result = model.Segmentation( + images=[cv2.imread('/PATH/TO/IMAGE')], + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=True) + ``` + - ### 2、API + + ```python + def Segmentation( + images=None, + paths=None, + batch_size=1, + input_size=320, + output_dir='output', + visualization=False): + ``` + - 图像前景背景分割 API + + - **参数** + * images (list[np.ndarray]) : 输入图像数据列表(BGR) + * paths (list[str]) : 输入图像路径列表 + * batch_size (int) : 数据批大小 + * input_size (int) : 输入图像大小 + * output_dir (str) : 可视化图像输出目录 + * visualization (bool) : 是否可视化 + + - **返回** + * results (list[np.ndarray]): 输出图像数据列表 + +## 四、更新历史 + +* 1.0.0 + + 初始发布 + + + + + + + diff --git a/modules/thirdparty/image/semantic_segmentation/U2Netp/module.py b/modules/image/semantic_segmentation/U2Netp/module.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/U2Netp/module.py rename to modules/image/semantic_segmentation/U2Netp/module.py diff --git a/modules/thirdparty/image/semantic_segmentation/U2Netp/processor.py b/modules/image/semantic_segmentation/U2Netp/processor.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/U2Netp/processor.py rename to modules/image/semantic_segmentation/U2Netp/processor.py diff --git a/modules/thirdparty/image/semantic_segmentation/U2Netp/u2net.py b/modules/image/semantic_segmentation/U2Netp/u2net.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/U2Netp/u2net.py rename to modules/image/semantic_segmentation/U2Netp/u2net.py diff --git a/modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/README.md b/modules/image/semantic_segmentation/WatermeterSegmentation/README.md similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/README.md rename to modules/image/semantic_segmentation/WatermeterSegmentation/README.md diff --git a/modules/image/semantic_segmentation/WatermeterSegmentation/__init__.py b/modules/image/semantic_segmentation/WatermeterSegmentation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/assets/model.yml b/modules/image/semantic_segmentation/WatermeterSegmentation/assets/model.yml similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/assets/model.yml rename to modules/image/semantic_segmentation/WatermeterSegmentation/assets/model.yml diff --git a/modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/module.py b/modules/image/semantic_segmentation/WatermeterSegmentation/module.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/module.py rename to modules/image/semantic_segmentation/WatermeterSegmentation/module.py diff --git a/modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/serving_client_demo.py b/modules/image/semantic_segmentation/WatermeterSegmentation/serving_client_demo.py similarity index 100% rename from modules/thirdparty/image/semantic_segmentation/WatermeterSegmentation/serving_client_demo.py rename to modules/image/semantic_segmentation/WatermeterSegmentation/serving_client_demo.py diff --git a/modules/image/semantic_segmentation/ace2p/README.md b/modules/image/semantic_segmentation/ace2p/README.md index c122ae728e00f4eb27535ce99fff5d9dc677e626..710c2424a45298d86b1486afbf751eb874ae4764 100644 --- a/modules/image/semantic_segmentation/ace2p/README.md +++ b/modules/image/semantic_segmentation/ace2p/README.md @@ -1,131 +1,178 @@ -```shell -$ hub install ace2p==1.1.0 -``` +# ace2p -

-
-

+|模型名称|ace2p| +| :--- | :---: | +|类别|图像-图像分割| +|网络|ACE2P| +|数据集|LIP| +|是否支持Fine-tuning|否| +|模型大小|259MB| +|指标|-| +|最新更新日期|2021-02-26| -## 命令行预测 -``` -hub run ace2p --input_path "/PATH/TO/IMAGE" -``` +## 一、模型基本信息 -## API +- ### 应用效果展示 -```python -def segmentation(images=None, - paths=None, - batch_size=1, - use_gpu=False, - output_dir='ace2p_output', - visualization=False): -``` + - 网络结构: +

+
+

-预测API,用于图像分割得到人体解析。 + - 调色板 -**参数** +

+
+

-* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU; -* output\_dir (str): 保存处理结果的文件目录; -* visualization (bool): 是否将识别结果保存为图片文件。 + - 样例结果示例: +

+ +

-**返回** +- ### 模型介绍 -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有'path', 'data',相应的取值为: - * path (str): 原输入图片的路径; - * data (numpy.ndarray): 图像分割得到的结果,shape 为`H * W`,元素的取值为0-19,表示每个像素的分类结果,映射顺序与下面的调色板相同。 + - 人体解析(Human Parsing)是细粒度的语义分割任务,其旨在识别像素级别的人类图像的组成部分(例如,身体部位和服装)。ACE2P通过融合底层特征,全局上下文信息和边缘细节,端到端地训练学习人体解析任务。该结构针对Intersection over Union指标进行针对性的优化学习,提升准确率。以ACE2P单人人体解析网络为基础的解决方案在CVPR2019第三届LIP挑战赛中赢得了全部三个人体解析任务的第一名。该PaddleHub Module采用ResNet101作为骨干网络,接受输入图片大小为473x473x3。 -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` -将模型保存到指定路径。 -**参数** +## 二、安装 -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中。 +- ### 1、环境依赖 -## 代码示例 + - paddlepaddle >= 2.0.0 -```python -import paddlehub as hub -import cv2 + - paddlehub >= 2.0.0 -human_parser = hub.Module(name="ace2p") -result = human_parser.segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = human_parser.segmentation((paths=['/PATH/TO/IMAGE']) -``` +- ### 2.安装 -## 服务部署 + - ```shell + $ hub install ace2p + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -PaddleHub Serving可以部署一个人体解析的在线服务。 +## 三、模型API预测 + - ### 1、命令行预测 -### 第一步:启动PaddleHub Serving + ```shell + $ hub install ace2p==1.1.0 + ``` -运行启动命令: -```shell -$ hub serving start -m ace2p -``` + - ### 2、代码示例 -这样就完成了一个人体解析服务化API的部署,默认端口号为8866。 + ```python + import paddlehub as hub + import cv2 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + human_parser = hub.Module(name="ace2p") + result = human_parser.segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) + ``` + + - ### 3、API -### 第二步:发送预测请求 + ```python + def segmentation(images=None, + paths=None, + batch_size=1, + use_gpu=False, + output_dir='ace2p_output', + visualization=False): + ``` -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - 预测API,用于图像分割得到人体解析。 -```python -import requests -import json -import cv2 -import base64 + - **参数** -import numpy as np + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * batch\_size (int): batch 的大小; + * use\_gpu (bool): 是否使用 GPU; + * output\_dir (str): 保存处理结果的文件目录; + * visualization (bool): 是否将识别结果保存为图片文件。 + - **返回** -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有'path', 'data',相应的取值为: + * path (str): 原输入图片的路径; + * data (numpy.ndarray): 图像分割得到的结果,shape 为`H * W`,元素的取值为0-19,表示每个像素的分类结果,映射顺序与下面的调色板相同。 + ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - 将模型保存到指定路径。 + - **参数** -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/ace2p" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中。 -# 打印预测结果 -print(base64_to_cv2(r.json()["results"][0]['data'])) -``` -## 调色板 +## 四、服务部署 -

-
-

+- PaddleHub Serving可以部署一个人体解析的在线服务。 -## 依赖 +- ### 第一步:启动PaddleHub Serving -paddlepaddle >= 1.6.2 + - 运行启动命令: + + ```shell + $ hub serving start -m ace2p + ``` -paddlehub >= 1.6.0 + - 这样就完成了一个人体解析服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import cv2 + import base64 + + import numpy as np + + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/ace2p" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(base64_to_cv2(r.json()["results"][0]['data'])) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 适配paddlehub2.0版本 diff --git a/modules/image/semantic_segmentation/bisenetv2_cityscapes/README.md b/modules/image/semantic_segmentation/bisenetv2_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a6cf08ca95e2d0fa8786eb0b6bbb4dcf64920ffe --- /dev/null +++ b/modules/image/semantic_segmentation/bisenetv2_cityscapes/README.md @@ -0,0 +1,176 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='bisenetv2_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + + +## 如何开始Fine-tune + +本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用bisenetv2_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='bisenetv2_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='bisenetv2_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m bisenetv2_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/bisenetv2_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/bisenetv2_cityscapes/layers.py b/modules/image/semantic_segmentation/bisenetv2_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..dcaaded9f5453655c24bbb85e0115b8bb2fb7008 --- /dev/null +++ b/modules/image/semantic_segmentation/bisenetv2_cityscapes/layers.py @@ -0,0 +1,186 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu' or os.environ.get('PADDLESEG_EXPORT_STAGE'): + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvReLUPool(nn.Layer): + """Basic conv bn pool layer.""" + + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.conv = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv(x) + x = F.relu(x) + x = F.pool2d(x, pool_size=2, pool_type="max", pool_stride=2) + return x + + +class SeparableConvBNReLU(nn.Layer): + """Basic separable conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class DepthwiseConvBN(nn.Layer): + """Basic depthwise conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + + self.depthwise_conv = ConvBN( + in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, in_channels: int, inter_channels: int, out_channels: int, dropout_prob: float = 0.1): + super().__init__() + + self.conv_bn_relu = ConvBNReLU(in_channels=in_channels, out_channels=inter_channels, kernel_size=3, padding=1) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D(in_channels=inter_channels, out_channels=out_channels, kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x diff --git a/modules/image/semantic_segmentation/bisenetv2_cityscapes/module.py b/modules/image/semantic_segmentation/bisenetv2_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7745be3c6ec0ae3b598b6598503449c670a54a50 --- /dev/null +++ b/modules/image/semantic_segmentation/bisenetv2_cityscapes/module.py @@ -0,0 +1,288 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +import bisenet_cityscapes.layers as layers + + +@moduleinfo( + name="bisenetv2_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="Bisenet is a segmentation model trained by Cityscapes.", + version="1.0.0", + meta=ImageSegmentationModule) +class BiSeNetV2(nn.Layer): + """ + The BiSeNet V2 implementation based on PaddlePaddle. + + The original article refers to + Yu, Changqian, et al. "BiSeNet V2: Bilateral Network with Guided Aggregation for Real-time Semantic Segmentation" + (https://arxiv.org/abs/2004.02147) + + Args: + num_classes (int): The unique number of target classes, default is 19. + lambd (float, optional): A factor for controlling the size of semantic branch channels. Default: 0.25. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, num_classes: int = 19, lambd: float = 0.25, align_corners: bool = False, pretrained: str = None): + super(BiSeNetV2, self).__init__() + + C1, C2, C3 = 64, 64, 128 + db_channels = (C1, C2, C3) + C1, C3, C4, C5 = int(C1 * lambd), int(C3 * lambd), 64, 128 + sb_channels = (C1, C3, C4, C5) + mid_channels = 128 + + self.db = DetailBranch(db_channels) + self.sb = SemanticBranch(sb_channels) + + self.bga = BGA(mid_channels, align_corners) + self.aux_head1 = SegHead(C1, C1, num_classes) + self.aux_head2 = SegHead(C3, C3, num_classes) + self.aux_head3 = SegHead(C4, C4, num_classes) + self.aux_head4 = SegHead(C5, C5, num_classes) + self.head = SegHead(mid_channels, mid_channels, num_classes) + + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'bisenet_model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + dfm = self.db(x) + feat1, feat2, feat3, feat4, sfm = self.sb(x) + logit = self.head(self.bga(dfm, sfm)) + + if not self.training: + logit_list = [logit] + else: + logit1 = self.aux_head1(feat1) + logit2 = self.aux_head2(feat2) + logit3 = self.aux_head3(feat3) + logit4 = self.aux_head4(feat4) + logit_list = [logit, logit1, logit2, logit3, logit4] + + logit_list = [ + F.interpolate(logit, paddle.shape(x)[2:], mode='bilinear', align_corners=self.align_corners) + for logit in logit_list + ] + + return logit_list + + +class StemBlock(nn.Layer): + def __init__(self, in_dim: int, out_dim: int): + super(StemBlock, self).__init__() + + self.conv = layers.ConvBNReLU(in_dim, out_dim, 3, stride=2) + + self.left = nn.Sequential( + layers.ConvBNReLU(out_dim, out_dim // 2, 1), layers.ConvBNReLU(out_dim // 2, out_dim, 3, stride=2)) + + self.right = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + + self.fuse = layers.ConvBNReLU(out_dim * 2, out_dim, 3) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv(x) + left = self.left(x) + right = self.right(x) + concat = paddle.concat([left, right], axis=1) + return self.fuse(concat) + + +class ContextEmbeddingBlock(nn.Layer): + def __init__(self, in_dim: int, out_dim: int): + super(ContextEmbeddingBlock, self).__init__() + + self.gap = nn.AdaptiveAvgPool2D(1) + self.bn = layers.SyncBatchNorm(in_dim) + + self.conv_1x1 = layers.ConvBNReLU(in_dim, out_dim, 1) + self.conv_3x3 = nn.Conv2D(out_dim, out_dim, 3, 1, 1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + gap = self.gap(x) + bn = self.bn(gap) + conv1 = self.conv_1x1(bn) + x + return self.conv_3x3(conv1) + + +class GatherAndExpansionLayer1(nn.Layer): + """Gather And Expansion Layer with stride 1""" + + def __init__(self, in_dim: int, out_dim: int, expand: int): + super().__init__() + + expand_dim = expand * in_dim + + self.conv = nn.Sequential( + layers.ConvBNReLU(in_dim, in_dim, 3), layers.DepthwiseConvBN(in_dim, expand_dim, 3), + layers.ConvBN(expand_dim, out_dim, 1)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + return F.relu(self.conv(x) + x) + + +class GatherAndExpansionLayer2(nn.Layer): + """Gather And Expansion Layer with stride 2""" + + def __init__(self, in_dim: int, out_dim: int, expand: int): + super().__init__() + + expand_dim = expand * in_dim + + self.branch_1 = nn.Sequential( + layers.ConvBNReLU(in_dim, in_dim, 3), layers.DepthwiseConvBN(in_dim, expand_dim, 3, stride=2), + layers.DepthwiseConvBN(expand_dim, expand_dim, 3), layers.ConvBN(expand_dim, out_dim, 1)) + + self.branch_2 = nn.Sequential( + layers.DepthwiseConvBN(in_dim, in_dim, 3, stride=2), layers.ConvBN(in_dim, out_dim, 1)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + return F.relu(self.branch_1(x) + self.branch_2(x)) + + +class DetailBranch(nn.Layer): + """The detail branch of BiSeNet, which has wide channels but shallow layers.""" + + def __init__(self, in_channels: int): + super().__init__() + + C1, C2, C3 = in_channels + + self.convs = nn.Sequential( + # stage 1 + layers.ConvBNReLU(3, C1, 3, stride=2), + layers.ConvBNReLU(C1, C1, 3), + # stage 2 + layers.ConvBNReLU(C1, C2, 3, stride=2), + layers.ConvBNReLU(C2, C2, 3), + layers.ConvBNReLU(C2, C2, 3), + # stage 3 + layers.ConvBNReLU(C2, C3, 3, stride=2), + layers.ConvBNReLU(C3, C3, 3), + layers.ConvBNReLU(C3, C3, 3), + ) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + return self.convs(x) + + +class SemanticBranch(nn.Layer): + """The semantic branch of BiSeNet, which has narrow channels but deep layers.""" + + def __init__(self, in_channels: int): + super().__init__() + C1, C3, C4, C5 = in_channels + + self.stem = StemBlock(3, C1) + + self.stage3 = nn.Sequential(GatherAndExpansionLayer2(C1, C3, 6), GatherAndExpansionLayer1(C3, C3, 6)) + + self.stage4 = nn.Sequential(GatherAndExpansionLayer2(C3, C4, 6), GatherAndExpansionLayer1(C4, C4, 6)) + + self.stage5_4 = nn.Sequential( + GatherAndExpansionLayer2(C4, C5, 6), GatherAndExpansionLayer1(C5, C5, 6), GatherAndExpansionLayer1( + C5, C5, 6), GatherAndExpansionLayer1(C5, C5, 6)) + + self.ce = ContextEmbeddingBlock(C5, C5) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + stage2 = self.stem(x) + stage3 = self.stage3(stage2) + stage4 = self.stage4(stage3) + stage5_4 = self.stage5_4(stage4) + fm = self.ce(stage5_4) + return stage2, stage3, stage4, stage5_4, fm + + +class BGA(nn.Layer): + """The Bilateral Guided Aggregation Layer, used to fuse the semantic features and spatial features.""" + + def __init__(self, out_dim: int, align_corners: bool): + super().__init__() + + self.align_corners = align_corners + + self.db_branch_keep = nn.Sequential(layers.DepthwiseConvBN(out_dim, out_dim, 3), nn.Conv2D(out_dim, out_dim, 1)) + + self.db_branch_down = nn.Sequential( + layers.ConvBN(out_dim, out_dim, 3, stride=2), nn.AvgPool2D(kernel_size=3, stride=2, padding=1)) + + self.sb_branch_keep = nn.Sequential( + layers.DepthwiseConvBN(out_dim, out_dim, 3), nn.Conv2D(out_dim, out_dim, 1), + layers.Activation(act='sigmoid')) + + self.sb_branch_up = layers.ConvBN(out_dim, out_dim, 3) + + self.conv = layers.ConvBN(out_dim, out_dim, 3) + + def forward(self, dfm: int, sfm: int) -> paddle.Tensor: + db_feat_keep = self.db_branch_keep(dfm) + db_feat_down = self.db_branch_down(dfm) + sb_feat_keep = self.sb_branch_keep(sfm) + + sb_feat_up = self.sb_branch_up(sfm) + sb_feat_up = F.interpolate( + sb_feat_up, paddle.shape(db_feat_keep)[2:], mode='bilinear', align_corners=self.align_corners) + + sb_feat_up = F.sigmoid(sb_feat_up) + db_feat = db_feat_keep * sb_feat_up + + sb_feat = db_feat_down * sb_feat_keep + sb_feat = F.interpolate(sb_feat, paddle.shape(db_feat)[2:], mode='bilinear', align_corners=self.align_corners) + + return self.conv(db_feat + sb_feat) + + +class SegHead(nn.Layer): + def __init__(self, in_dim: int, mid_dim: int, num_classes: int): + super().__init__() + + self.conv_3x3 = nn.Sequential(layers.ConvBNReLU(in_dim, mid_dim, 3), nn.Dropout(0.1)) + + self.conv_1x1 = nn.Conv2D(mid_dim, num_classes, 1, 1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv1 = self.conv_3x3(x) + conv2 = self.conv_1x1(conv1) + return conv2 diff --git a/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/README.md b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..9629ccdf433eb1c1970571a01f2663a2af8457f6 --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/README.md @@ -0,0 +1,173 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='deeplabv3p_resnet50_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用deeplabv3p_resnet50_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='deeplabv3p_resnet50_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='deeplabv3p_resnet50_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m deeplabv3p_resnet50_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/deeplabv3p_resnet50_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/layers.py b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ee62265b585c80189c32846c0037b2b002244d6d --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/layers.py @@ -0,0 +1,295 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu', name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: tuple, + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU(in_channels=out_channels * out_size, out_channels=out_channels, kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate(y, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate(img_avg, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/module.py b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..149bc4e04a7b52f8fce71bef4c3dbcdc8e4b74ec --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/module.py @@ -0,0 +1,169 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from deeplabv3p_resnet50_cityscapes.resnet import ResNet50_vd +import deeplabv3p_resnet50_cityscapes.layers as L + + +@moduleinfo( + name="deeplabv3p_resnet50_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="DeepLabV3PResnet50 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class DeepLabV3PResnet50(nn.Layer): + """ + The DeepLabV3PResnet50 implementation based on PaddlePaddle. + + The original article refers to + Liang-Chieh Chen, et, al. "Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation" + (https://arxiv.org/abs/1802.02611) + + Args: + num_classes (int): the unique number of target classes. + backbone_indices (tuple): two values in the tuple indicate the indices of output of backbone. + the first index will be taken as a low-level feature in Decoder component; + the second one will be taken as input of ASPP component. + Usually backbone consists of four downsampling stage, and return an output of + each stage, so we set default (0, 3), which means taking feature map of the first + stage in backbone as low-level feature used in Decoder, and feature map of the fourth + stage as input of ASPP. + aspp_ratios (tuple): the dilation rate using in ASSP module. + if output_stride=16, aspp_ratios should be set as (1, 6, 12, 18). + if output_stride=8, aspp_ratios is (1, 12, 24, 36). + aspp_out_channels (int): the output channels of ASPP module. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str): the path of pretrained model. Default to None. + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int] = (0, 3), + aspp_ratios: Tuple[int] = (1, 12, 24, 36), + aspp_out_channels: int = 256, + align_corners=False, + pretrained: str = None): + super(DeepLabV3PResnet50, self).__init__() + self.backbone = ResNet50_vd() + backbone_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + self.head = DeepLabV3PHead(num_classes, backbone_indices, backbone_channels, aspp_ratios, aspp_out_channels, + align_corners) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate(logit, x.shape[2:], mode='bilinear', align_corners=self.align_corners) for logit in logit_list + ] + + +class DeepLabV3PHead(nn.Layer): + """ + The DeepLabV3PHead implementation based on PaddlePaddle. + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple): Two values in the tuple indicate the indices of output of backbone. + the first index will be taken as a low-level feature in Decoder component; + the second one will be taken as input of ASPP component. + Usually backbone consists of four downsampling stage, and return an output of + each stage. If we set it as (0, 3), it means taking feature map of the first + stage in backbone as low-level feature used in Decoder, and feature map of the fourth + stage as input of ASPP. + backbone_channels (tuple): The same length with "backbone_indices". It indicates the channels of corresponding index. + aspp_ratios (tuple): The dilation rates using in ASSP module. + aspp_out_channels (int): The output channels of ASPP module. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, num_classes: int, backbone_indices: Tuple[paddle.Tensor], + backbone_channels: Tuple[paddle.Tensor], aspp_ratios: Tuple[float], aspp_out_channels: int, + align_corners: bool): + super().__init__() + + self.aspp = L.ASPPModule( + aspp_ratios, backbone_channels[1], aspp_out_channels, align_corners, use_sep_conv=True, image_pooling=True) + self.decoder = Decoder(num_classes, backbone_channels[0], align_corners) + self.backbone_indices = backbone_indices + + def forward(self, feat_list: List[paddle.Tensor]) -> List[paddle.Tensor]: + logit_list = [] + low_level_feat = feat_list[self.backbone_indices[0]] + x = feat_list[self.backbone_indices[1]] + x = self.aspp(x) + logit = self.decoder(x, low_level_feat) + logit_list.append(logit) + return logit_list + + +class Decoder(nn.Layer): + """ + Decoder module of DeepLabV3P model + + Args: + num_classes (int): The number of classes. + in_channels (int): The number of input channels in decoder module. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, num_classes: int, in_channels: int, align_corners: bool): + super(Decoder, self).__init__() + + self.conv_bn_relu1 = L.ConvBNReLU(in_channels=in_channels, out_channels=48, kernel_size=1) + + self.conv_bn_relu2 = L.SeparableConvBNReLU(in_channels=304, out_channels=256, kernel_size=3, padding=1) + self.conv_bn_relu3 = L.SeparableConvBNReLU(in_channels=256, out_channels=256, kernel_size=3, padding=1) + self.conv = nn.Conv2D(in_channels=256, out_channels=num_classes, kernel_size=1) + + self.align_corners = align_corners + + def forward(self, x: paddle.Tensor, low_level_feat: paddle.Tensor) -> paddle.Tensor: + low_level_feat = self.conv_bn_relu1(low_level_feat) + x = F.interpolate(x, low_level_feat.shape[2:], mode='bilinear', align_corners=self.align_corners) + x = paddle.concat([x, low_level_feat], axis=1) + x = self.conv_bn_relu2(x) + x = self.conv_bn_relu3(x) + x = self.conv(x) + return x diff --git a/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/resnet.py b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6c7fdfeb66c84d1595954bac4fcd65863649f7c8 --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_resnet50_cityscapes/resnet.py @@ -0,0 +1,115 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Union, List, Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +import deeplabv3p_resnet50_cityscapes.layers as L + + +class BasicBlock(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + self.stride = stride + self.conv0 = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + name=name + "_branch2a") + self.conv1 = L.ConvBNLayer( + in_channels=out_channels, out_channels=out_channels, kernel_size=3, act=None, name=name + "_branch2b") + + if not shortcut: + self.short = L.ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + conv1 = self.conv1(y) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + y = paddle.elementwise_add(x=short, y=conv1, act='relu') + + return y + + +class ResNet50_vd(nn.Layer): + def __init__(self, multi_grid: Tuple[int] = (1, 2, 4)): + super(ResNet50_vd, self).__init__() + depth = [3, 4, 6, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + self.feat_channels = [c * 4 for c in num_filters] + dilation_dict = {2: 2, 3: 4} + self.conv1_1 = L.ConvBNLayer( + in_channels=3, out_channels=32, kernel_size=3, stride=2, act='relu', name="conv1_1") + self.conv1_2 = L.ConvBNLayer( + in_channels=32, out_channels=32, kernel_size=3, stride=1, act='relu', name="conv1_2") + self.conv1_3 = L.ConvBNLayer( + in_channels=32, out_channels=64, kernel_size=3, stride=1, act='relu', name="conv1_3") + self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1) + self.stage_list = [] + + for block in range(len(depth)): + shortcut = False + block_list = [] + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + dilation_rate = dilation_dict[block] if dilation_dict and block in dilation_dict else 1 + if block == 3: + dilation_rate = dilation_rate * multi_grid[i] + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + L.BottleneckBlock( + in_channels=num_channels[block] if i == 0 else num_filters[block] * 4, + out_channels=num_filters[block], + stride=2 if i == 0 and block != 0 and dilation_rate == 1 else 1, + shortcut=shortcut, + if_first=block == i == 0, + name=conv_name, + dilation=dilation_rate)) + block_list.append(bottleneck_block) + shortcut = True + self.stage_list.append(block_list) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv1_1(inputs) + y = self.conv1_2(y) + y = self.conv1_3(y) + y = self.pool2d_max(y) + feat_list = [] + for stage in self.stage_list: + for block in stage: + y = block(y) + feat_list.append(y) + return feat_list diff --git a/modules/image/semantic_segmentation/deeplabv3p_resnet50_voc/README.md b/modules/image/semantic_segmentation/deeplabv3p_resnet50_voc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..954a01fa4da94b3f1d20e4fcb948ec1a6136a16d --- /dev/null +++ b/modules/image/semantic_segmentation/deeplabv3p_resnet50_voc/README.md @@ -0,0 +1,176 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='deeplabv3p_resnet50_voc') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + + +## 如何开始Fine-tune + +本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用deeplabv3p_resnet50_voc模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='deeplabv3p_resnet50_voc', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='deeplabv3p_resnet50_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m deeplabv3p_resnet50_voc +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/bisenetv2_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md index 922273c363b8a76c0cfaa577584a5d55b4eb7465..f84c3578a7f72c94d925df157fa027ed551ed12d 100644 --- a/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md +++ b/modules/image/semantic_segmentation/deeplabv3p_xception65_humanseg/README.md @@ -1,125 +1,179 @@ -## 命令行预测 - -``` -hub run deeplabv3p_xception65_humanseg --input_path "/PATH/TO/IMAGE" -``` - +# deeplabv3p_xception65_humanseg + +|模型名称|deeplabv3p_xception65_humanseg| +| :--- | :---: | +|类别|图像-图像分割| +|网络|deeplabv3p| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|162MB| +|指标|-| +|最新更新日期|2021-02-26| + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 + + - DeepLabv3+使用百度自建数据集进行训练,可用于人像分割,支持任意大小的图片输入。


-## API +- 更多详情请参考:[deeplabv3p](https://github.com/PaddlePaddle/PaddleSeg) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install deeplabv3p_xception65_humanseg + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1.命令行预测 + + ```shell + hub run deeplabv3p_xception65_humanseg --input_path "/PATH/TO/IMAGE" + ``` + + + +- ### 2.预测代码示例 + + ```python + import paddlehub as hub + import cv2 + + human_seg = hub.Module(name="deeplabv3p_xception65_humanseg") + result = human_seg.segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) + + ``` + +- ### 3.API -```python -def segmentation(images=None, - paths=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='humanseg_output') -``` + ```python + def segmentation(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_output') + ``` -预测API,用于人像分割。 + - 预测API,用于人像分割。 -**参数** + - **参数** -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * batch\_size (int): batch 的大小; + * use\_gpu (bool): 是否使用 GPU; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -**返回** + - **返回** -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True) -``` + ```python + def save_inference_model(dirname, + model_filename=None, + params_filename=None, + combined=True) + ``` -将模型保存到指定路径。 + - 将模型保存到指定路径。 -**参数** + - **参数** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -## 代码示例 -```python -import paddlehub as hub -import cv2 +## 四、服务部署 -human_seg = hub.Module(name="deeplabv3p_xception65_humanseg") -result = human_seg.segmentation(images=[cv2.imread('/PATH/TO/IMAGE')]) -# or -# result = human_seg.segmentation(paths=['/PATH/TO/IMAGE']) -``` +- PaddleHub Serving可以部署一个人像分割的在线服务。 -## 服务部署 +- ### 第一步:启动PaddleHub Serving -PaddleHub Serving可以部署一个人像分割的在线服务。 + - 运行启动命令: -## 第一步:启动PaddleHub Serving + ```shell + $ hub serving start -m deeplabv3p_xception65_humanseg + ``` -运行启动命令: -```shell -$ hub serving start -m deeplabv3p_xception65_humanseg -``` + - 这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 -这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 +- ### 第二步:发送预测请求 -## 第二步:发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + ```python + import requests + import json + import cv2 + import base64 + import numpy as np -```python -import requests -import json -import cv2 -import base64 -import numpy as np + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/deeplabv3p_xception65_humanseg" + r = requests.post(url=url, headers=headers, # 保存图片 + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_server.png", rgba) + ``` -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data +## 五、更新历史 +* 1.0.0 -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/deeplabv3p_xception65_humanseg" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + 初始发布 -# 打印预测结果 -print(base64_to_cv2(r.json()["results"][0]['data'])) -``` +* 1.1.0 -### 查看代码 + 提升预测性能 -[PaddleSeg 特色垂类模型 - 人像分割](https://github.com/PaddlePaddle/PaddleSeg/tree/release/v0.4.0/contrib) +* 1.1.1 -### 依赖 + 修复预测后处理图像数据超过[0,255]范围 -paddlepaddle >= 1.6.2 +* 1.1.2 -paddlehub >= 1.6.0 + 修复cudnn为8.0.4显存泄露问题 diff --git a/modules/image/semantic_segmentation/fastscnn_cityscapes/README.md b/modules/image/semantic_segmentation/fastscnn_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c5db026d2bb5450aca1f2e4106f0e3abf0f212c --- /dev/null +++ b/modules/image/semantic_segmentation/fastscnn_cityscapes/README.md @@ -0,0 +1,173 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fastscnn_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用fastscnn_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='fastscnn_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fastscnn_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m fastscnn_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/fastscnn_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/fastscnn_cityscapes/layers.py b/modules/image/semantic_segmentation/fastscnn_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..5e36a1501126097f5021c0b5e2e53cd98b67976a --- /dev/null +++ b/modules/image/semantic_segmentation/fastscnn_cityscapes/layers.py @@ -0,0 +1,256 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu' or os.environ.get('PADDLESEG_EXPORT_STAGE'): + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvReLUPool(nn.Layer): + """Basic conv bn pool layer.""" + + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.conv = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv(x) + x = F.relu(x) + x = F.pool2d(x, pool_size=2, pool_type="max", pool_stride=2) + return x + + +class SeparableConvBNReLU(nn.Layer): + """Basic separable conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class DepthwiseConvBN(nn.Layer): + """Basic depthwise conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, in_channels: int, inter_channels: int, out_channels: int, dropout_prob: float = 0.1): + super().__init__() + + self.conv_bn_relu = ConvBNReLU(in_channels=in_channels, out_channels=inter_channels, kernel_size=3, padding=1) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D(in_channels=inter_channels, out_channels=out_channels, kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x + + +class PPModule(nn.Layer): + """ + Pyramid pooling module originally in PSPNet. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels after pyramid pooling module. + bin_sizes (tuple, optional): The out size of pooled feature maps. Default: (1, 2, 3, 6). + dim_reduction (bool, optional): A bool value represents if reducing dimension after pooling. Default: True. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, in_channels: int, out_channels: int, bin_sizes: Tuple, dim_reduction: bool, align_corners: bool): + super().__init__() + + self.bin_sizes = bin_sizes + + inter_channels = in_channels + if dim_reduction: + inter_channels = in_channels // len(bin_sizes) + + # we use dimension reduction after pooling mentioned in original implementation. + self.stages = nn.LayerList([self._make_stage(in_channels, inter_channels, size) for size in bin_sizes]) + + self.conv_bn_relu2 = ConvBNReLU( + in_channels=in_channels + inter_channels * len(bin_sizes), + out_channels=out_channels, + kernel_size=3, + padding=1) + + self.align_corners = align_corners + + def _make_stage(self, in_channels: int, out_channels: int, size: int): + """ + Create one pooling layer. + + In our implementation, we adopt the same dimension reduction as the original paper that might be + slightly different with other implementations. + + After pooling, the channels are reduced to 1/len(bin_sizes) immediately, while some other implementations + keep the channels to be same. + + Args: + in_channels (int): The number of intput channels to pyramid pooling module. + out_channels (int): The number of output channels to pyramid pooling module. + size (int): The out size of the pooled layer. + + Returns: + conv (Tensor): A tensor after Pyramid Pooling Module. + """ + + prior = nn.AdaptiveAvgPool2D(output_size=(size, size)) + conv = ConvBNReLU(in_channels=in_channels, out_channels=out_channels, kernel_size=1) + + return nn.Sequential(prior, conv) + + def forward(self, input: paddle.Tensor) -> paddle.Tensor: + cat_layers = [] + for stage in self.stages: + x = stage(input) + x = F.interpolate(x, paddle.shape(input)[2:], mode='bilinear', align_corners=self.align_corners) + cat_layers.append(x) + cat_layers = [input] + cat_layers[::-1] + cat = paddle.concat(cat_layers, axis=1) + out = self.conv_bn_relu2(cat) + + return out diff --git a/modules/image/semantic_segmentation/fastscnn_cityscapes/module.py b/modules/image/semantic_segmentation/fastscnn_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..88e805fdcf405ea080cc37ba01e456a8bcba2acd --- /dev/null +++ b/modules/image/semantic_segmentation/fastscnn_cityscapes/module.py @@ -0,0 +1,275 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import Callable, Union, Tuple + +import paddle.nn as nn +import paddle.nn.functional as F +import paddle +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +import fastscnn_cityscapes.layers as layers + + +@moduleinfo( + name="fastscnn_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="fastscnn_cityscapes is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class FastSCNN(nn.Layer): + """ + The FastSCNN implementation based on PaddlePaddle. + As mentioned in the original paper, FastSCNN is a real-time segmentation algorithm (123.5fps) + even for high resolution images (1024x2048). + The original article refers to + Poudel, Rudra PK, et al. "Fast-scnn: Fast semantic segmentation network" + (https://arxiv.org/pdf/1902.04502.pdf). + Args: + num_classes (int): The unique number of target classes, default is 19. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, num_classes: int = 19, align_corners: bool = False, pretrained: str = None): + + super(FastSCNN, self).__init__() + + self.learning_to_downsample = LearningToDownsample(32, 48, 64) + self.global_feature_extractor = GlobalFeatureExtractor( + in_channels=64, + block_channels=[64, 96, 128], + out_channels=128, + expansion=6, + num_blocks=[3, 3, 3], + align_corners=True) + self.feature_fusion = FeatureFusionModule(64, 128, 128, align_corners) + self.classifier = Classifier(128, num_classes) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'fastscnn_model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + logit_list = [] + input_size = paddle.shape(x)[2:] + higher_res_features = self.learning_to_downsample(x) + x = self.global_feature_extractor(higher_res_features) + x = self.feature_fusion(higher_res_features, x) + logit = self.classifier(x) + logit = F.interpolate(logit, input_size, mode='bilinear', align_corners=self.align_corners) + logit_list.append(logit) + + return logit_list + + +class LearningToDownsample(nn.Layer): + """ + Learning to downsample module. + This module consists of three downsampling blocks (one conv and two separable conv) + Args: + dw_channels1 (int, optional): The input channels of the first sep conv. Default: 32. + dw_channels2 (int, optional): The input channels of the second sep conv. Default: 48. + out_channels (int, optional): The output channels of LearningToDownsample module. Default: 64. + """ + + def __init__(self, dw_channels1: int = 32, dw_channels2: int = 48, out_channels: int = 64): + super(LearningToDownsample, self).__init__() + + self.conv_bn_relu = layers.ConvBNReLU(in_channels=3, out_channels=dw_channels1, kernel_size=3, stride=2) + self.dsconv_bn_relu1 = layers.SeparableConvBNReLU( + in_channels=dw_channels1, out_channels=dw_channels2, kernel_size=3, stride=2, padding=1) + self.dsconv_bn_relu2 = layers.SeparableConvBNReLU( + in_channels=dw_channels2, out_channels=out_channels, kernel_size=3, stride=2, padding=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dsconv_bn_relu1(x) + x = self.dsconv_bn_relu2(x) + return x + + +class GlobalFeatureExtractor(nn.Layer): + """ + Global feature extractor module. + This module consists of three InvertedBottleneck blocks (like inverted residual introduced by MobileNetV2) and + a PPModule (introduced by PSPNet). + Args: + in_channels (int): The number of input channels to the module. + block_channels (tuple): A tuple represents output channels of each bottleneck block. + out_channels (int): The number of output channels of the module. Default: + expansion (int): The expansion factor in bottleneck. + num_blocks (tuple): It indicates the repeat time of each bottleneck. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, in_channels: int, block_channels: int, out_channels: int, expansion: int, num_blocks: Tuple[int], + align_corners: bool): + super(GlobalFeatureExtractor, self).__init__() + + self.bottleneck1 = self._make_layer(InvertedBottleneck, in_channels, block_channels[0], num_blocks[0], + expansion, 2) + self.bottleneck2 = self._make_layer(InvertedBottleneck, block_channels[0], block_channels[1], num_blocks[1], + expansion, 2) + self.bottleneck3 = self._make_layer(InvertedBottleneck, block_channels[1], block_channels[2], num_blocks[2], + expansion, 1) + + self.ppm = layers.PPModule( + block_channels[2], out_channels, bin_sizes=(1, 2, 3, 6), dim_reduction=True, align_corners=align_corners) + + def _make_layer(self, + block: Callable, + in_channels: int, + out_channels: int, + blocks: int, + expansion: int = 6, + stride: int = 1): + layers = [] + layers.append(block(in_channels, out_channels, expansion, stride)) + for _ in range(1, blocks): + layers.append(block(out_channels, out_channels, expansion, 1)) + return nn.Sequential(*layers) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.bottleneck1(x) + x = self.bottleneck2(x) + x = self.bottleneck3(x) + x = self.ppm(x) + return x + + +class InvertedBottleneck(nn.Layer): + """ + Single Inverted bottleneck implementation. + Args: + in_channels (int): The number of input channels to bottleneck block. + out_channels (int): The number of output channels of bottleneck block. + expansion (int, optional). The expansion factor in bottleneck. Default: 6. + stride (int, optional). The stride used in depth-wise conv. Defalt: 2. + """ + + def __init__(self, in_channels: int, out_channels: int, expansion: int = 6, stride: int = 2): + super().__init__() + + self.use_shortcut = stride == 1 and in_channels == out_channels + + expand_channels = in_channels * expansion + self.block = nn.Sequential( + # pw + layers.ConvBNReLU(in_channels=in_channels, out_channels=expand_channels, kernel_size=1, bias_attr=False), + # dw + layers.ConvBNReLU( + in_channels=expand_channels, + out_channels=expand_channels, + kernel_size=3, + stride=stride, + padding=1, + groups=expand_channels, + bias_attr=False), + # pw-linear + layers.ConvBN(in_channels=expand_channels, out_channels=out_channels, kernel_size=1, bias_attr=False)) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = self.block(x) + if self.use_shortcut: + out = x + out + return out + + +class FeatureFusionModule(nn.Layer): + """ + Feature Fusion Module Implementation. + This module fuses high-resolution feature and low-resolution feature. + Args: + high_in_channels (int): The channels of high-resolution feature (output of LearningToDownsample). + low_in_channels (int): The channels of low-resolution feature (output of GlobalFeatureExtractor). + out_channels (int): The output channels of this module. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + """ + + def __init__(self, high_in_channels: int, low_in_channels: int, out_channels: int, align_corners: bool): + super().__init__() + + # Only depth-wise conv + self.dwconv = layers.ConvBNReLU( + in_channels=low_in_channels, + out_channels=out_channels, + kernel_size=3, + padding=1, + groups=128, + bias_attr=False) + + self.conv_low_res = layers.ConvBN(out_channels, out_channels, 1) + self.conv_high_res = layers.ConvBN(high_in_channels, out_channels, 1) + self.align_corners = align_corners + + def forward(self, high_res_input: int, low_res_input: int) -> paddle.Tensor: + low_res_input = F.interpolate( + low_res_input, paddle.shape(high_res_input)[2:], mode='bilinear', align_corners=self.align_corners) + low_res_input = self.dwconv(low_res_input) + low_res_input = self.conv_low_res(low_res_input) + high_res_input = self.conv_high_res(high_res_input) + x = high_res_input + low_res_input + + return F.relu(x) + + +class Classifier(nn.Layer): + """ + The Classifier module implementation. + This module consists of two depth-wise conv and one conv. + Args: + input_channels (int): The input channels to this module. + num_classes (int): The unique number of target classes. + """ + + def __init__(self, input_channels: int, num_classes: int): + super().__init__() + + self.dsconv1 = layers.SeparableConvBNReLU( + in_channels=input_channels, out_channels=input_channels, kernel_size=3, padding=1) + + self.dsconv2 = layers.SeparableConvBNReLU( + in_channels=input_channels, out_channels=input_channels, kernel_size=3, padding=1) + + self.conv = nn.Conv2D(in_channels=input_channels, out_channels=num_classes, kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # dropout_prob + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.dsconv1(x) + x = self.dsconv2(x) + x = self.dropout(x) + x = self.conv(x) + return x diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/README.md b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7cd0b8cc83f8024bf90f01dcb5f46d893ca18298 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/README.md @@ -0,0 +1,174 @@ +# PaddleHub 图像分割 + +## 模型预测 + + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw18_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用fcn_hrnetw18_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='fcn_hrnetw18_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等, 其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw18_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m fcn_hrnetw18_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/fcn_hrnetw18_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/hrnet.py b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..3e8422ad158de9b13d4eb4771f1a1736cc3b571e --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/hrnet.py @@ -0,0 +1,531 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +import fcn_hrnetw18_cityscapes.layers as L + + +class HRNet_W18(nn.Layer): + """ + The HRNet implementation based on PaddlePaddle. + + The original article refers to + Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition" + (https://arxiv.org/pdf/1908.07919.pdf). + + Args: + stage1_num_modules (int, optional): Number of modules for stage1. Default 1. + stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4). + stage1_num_channels (list, optional): Number of channels per branch for stage1. Default (64). + stage2_num_modules (int, optional): Number of modules for stage2. Default 1. + stage2_num_blocks (list, optional): Number of blocks per module for stage2. Default (4, 4). + stage2_num_channels (list, optional): Number of channels per branch for stage2. Default (18, 36). + stage3_num_modules (int, optional): Number of modules for stage3. Default 4. + stage3_num_blocks (list, optional): Number of blocks per module for stage3. Default (4, 4, 4). + stage3_num_channels (list, optional): Number of channels per branch for stage3. Default [18, 36, 72). + stage4_num_modules (int, optional): Number of modules for stage4. Default 3. + stage4_num_blocks (list, optional): Number of blocks per module for stage4. Default (4, 4, 4, 4). + stage4_num_channels (list, optional): Number of channels per branch for stage4. Default (18, 36, 72. 144). + has_se (bool, optional): Whether to use Squeeze-and-Excitation module. Default False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + stage1_num_modules: int = 1, + stage1_num_blocks: Tuple[int] = (4, ), + stage1_num_channels: Tuple[int] = (64, ), + stage2_num_modules: int = 1, + stage2_num_blocks: Tuple[int] = (4, 4), + stage2_num_channels: Tuple[int] = (18, 36), + stage3_num_modules: int = 4, + stage3_num_blocks: Tuple[int] = (4, 4, 4), + stage3_num_channels: Tuple[int] = (18, 36, 72), + stage4_num_modules: int = 3, + stage4_num_blocks: Tuple[int] = (4, 4, 4, 4), + stage4_num_channels: Tuple[int] = (18, 36, 72, 144), + has_se: bool = False, + align_corners: bool = False): + super(HRNet_W18, self).__init__() + + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.align_corners = align_corners + self.feat_channels = [sum(stage4_num_channels)] + + self.conv_layer1_1 = L.ConvBNReLU( + in_channels=3, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.conv_layer1_2 = L.ConvBNReLU( + in_channels=64, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], out_channels=self.stage2_num_channels, name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2", + align_corners=align_corners) + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, out_channels=self.stage3_num_channels, name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3", + align_corners=align_corners) + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, out_channels=self.stage4_num_channels, name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4", + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + x0_h, x0_w = st4[0].shape[2:] + x1 = F.interpolate(st4[1], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x2 = F.interpolate(st4[2], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x3 = F.interpolate(st4[3], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x = paddle.concat([st4[0], x1, x2, x3], axis=1) + + return [x] + + +class Layer1(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, num_blocks: int, has_se: bool = False, name: str = None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv = x + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, in_channels: int, out_channels: int, name=None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + L.ConvBNReLU( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=3, + padding='same', + bias_attr=False)) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + L.ConvBNReLU( + in_channels=in_channels[-1], + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + self.conv_bn_func_list.append(residual) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(x[idx]) + else: + if idx < len(x): + outs.append(conv_bn_func(x[idx])) + else: + outs.append(conv_bn_func(x[-1])) + return outs + + +class Branches(nn.Layer): + def __init__(self, num_blocks: int, in_channels: int, out_channels: int, has_se: bool = False, name: str = None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, input in enumerate(x): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + has_se: bool, + stride: int = 1, + downsample: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = L.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + self.conv2 = L.ConvBNReLU( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + + self.conv3 = L.ConvBN( + in_channels=num_filters, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = L.ConvBN( + in_channels=num_channels, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, num_filters=num_filters * 4, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv3 = self.se(conv3) + + y = conv3 + residual + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + stride: int = 1, + has_se: bool = False, + downsample: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = L.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + self.conv2 = L.ConvBN( + in_channels=num_filters, out_channels=num_filters, kernel_size=3, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = L.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer(num_channels=num_filters, num_filters=num_filters, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv2 = self.se(conv2) + + y = conv2 + residual + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, reduction_ratio: int, name: str = None): + super(SELayer, self).__init__() + + self.pool2d_gap = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = nn.Linear( + num_channels, med_ch, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = nn.Linear( + med_ch, num_filters, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + pool = self.pool2d_gap(x) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape(excitation, shape=[-1, self._num_channels, 1, 1]) + out = x * excitation + return out + + +class Stage(nn.Layer): + def __init__(self, + num_channels: int, + num_modules: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + + self.stage_func_list.append(stage_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = x + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(nn.Layer): + def __init__(self, + num_channels: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: str = False): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, in_channels=num_channels, out_channels=num_filters, has_se=has_se, name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name, + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = self.branches_func(x) + out = self.fuse_func(out) + return out + + +class FuseLayers(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + self.align_corners = align_corners + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + L.ConvBN( + in_channels=in_channels[j], + out_channels=out_channels[i], + kernel_size=1, + padding='same', + bias_attr=False)) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + L.ConvBN( + in_channels=pre_num_filters, + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + L.ConvBNReLU( + in_channels=pre_num_filters, + out_channels=out_channels[j], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = x[i] + residual_shape = residual.shape[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + y = F.interpolate(y, residual_shape, mode='bilinear', align_corners=self.align_corners) + residual = residual + y + elif j < i: + y = x[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = residual + y + + residual = F.relu(residual) + outs.append(residual) + + return outs diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/layers.py b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..8758f54f9a840ae49fd6e424b98bfe1dd61e13ec --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/layers.py @@ -0,0 +1,296 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu', name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: Tuple[int], + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU(in_channels=out_channels * out_size, out_channels=out_channels, kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate(y, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate(img_avg, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/module.py b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..436207fc12954e43bbccf9a626a6cf9783a88db0 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_cityscapes/module.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from fcn_hrnetw18_cityscapes.hrnet import HRNet_W18 +import fcn_hrnetw18_cityscapes.layers as layers + + +@moduleinfo( + name="fcn_hrnetw18_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="Fcn_hrnetw18 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class FCN(nn.Layer): + """ + A simple implementation for FCN based on PaddlePaddle. + + The original article refers to + Evan Shelhamer, et, al. "Fully Convolutional Networks for Semantic Segmentation" + (https://arxiv.org/abs/1411.4038). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int] = (-1, ), + channels: int = None, + align_corners: bool = False, + pretrained: str = None): + super(FCN, self).__init__() + + self.backbone = HRNet_W18() + backbone_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + + self.head = FCNHead(num_classes, backbone_indices, backbone_channels, channels) + + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate(logit, paddle.shape(x)[2:], mode='bilinear', align_corners=self.align_corners) + for logit in logit_list + ] + + +class FCNHead(nn.Layer): + """ + A simple implementation for FCNHead based on PaddlePaddle + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + backbone_channels (tuple): The values of backbone channels. + Default: (270, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + pretrained (str, optional): The path of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int, + backbone_indices: Tuple[int] = (-1, ), + backbone_channels: Tuple[int] = (270, ), + channels: int = None): + super(FCNHead, self).__init__() + + self.num_classes = num_classes + self.backbone_indices = backbone_indices + if channels is None: + channels = backbone_channels[0] + + self.conv_1 = layers.ConvBNReLU( + in_channels=backbone_channels[0], out_channels=channels, kernel_size=1, padding='same', stride=1) + self.cls = nn.Conv2D(in_channels=channels, out_channels=self.num_classes, kernel_size=1, stride=1, padding=0) + + def forward(self, feat_list: nn.Layer) -> List[paddle.Tensor]: + logit_list = [] + x = feat_list[self.backbone_indices[0]] + x = self.conv_1(x) + logit = self.cls(x) + logit_list.append(logit) + return logit_list diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_voc/README.md b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..251f9480dd49c5e6632e2b3814face5147c258bc --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/README.md @@ -0,0 +1,175 @@ +# PaddleHub 图像分割 + + +## 模型预测 + + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw18_voc') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用fcn_hrnetw18_voc模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='fcn_hrnetw18_voc', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw18_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m fcn_hrnetw18_voc +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/fcn_hrnetw18_voc" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_voc/hrnet.py b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..0766871d0f6dd82cc29aae13b7e01d2e377124a9 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/hrnet.py @@ -0,0 +1,531 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +import fcn_hrnetw18_voc.layers as L + + +class HRNet_W18(nn.Layer): + """ + The HRNet implementation based on PaddlePaddle. + + The original article refers to + Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition" + (https://arxiv.org/pdf/1908.07919.pdf). + + Args: + stage1_num_modules (int, optional): Number of modules for stage1. Default 1. + stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4). + stage1_num_channels (list, optional): Number of channels per branch for stage1. Default (64). + stage2_num_modules (int, optional): Number of modules for stage2. Default 1. + stage2_num_blocks (list, optional): Number of blocks per module for stage2. Default (4, 4). + stage2_num_channels (list, optional): Number of channels per branch for stage2. Default (18, 36). + stage3_num_modules (int, optional): Number of modules for stage3. Default 4. + stage3_num_blocks (list, optional): Number of blocks per module for stage3. Default (4, 4, 4). + stage3_num_channels (list, optional): Number of channels per branch for stage3. Default [18, 36, 72). + stage4_num_modules (int, optional): Number of modules for stage4. Default 3. + stage4_num_blocks (list, optional): Number of blocks per module for stage4. Default (4, 4, 4, 4). + stage4_num_channels (list, optional): Number of channels per branch for stage4. Default (18, 36, 72. 144). + has_se (bool, optional): Whether to use Squeeze-and-Excitation module. Default False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + stage1_num_modules: int = 1, + stage1_num_blocks: Tuple[int] = (4, ), + stage1_num_channels: Tuple[int] = (64, ), + stage2_num_modules: int = 1, + stage2_num_blocks: Tuple[int] = (4, 4), + stage2_num_channels: Tuple[int] = (18, 36), + stage3_num_modules: int = 4, + stage3_num_blocks: Tuple[int] = (4, 4, 4), + stage3_num_channels: Tuple[int] = (18, 36, 72), + stage4_num_modules: int = 3, + stage4_num_blocks: Tuple[int] = (4, 4, 4, 4), + stage4_num_channels: Tuple[int] = (18, 36, 72, 144), + has_se: bool = False, + align_corners: bool = False): + super(HRNet_W18, self).__init__() + + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.align_corners = align_corners + self.feat_channels = [sum(stage4_num_channels)] + + self.conv_layer1_1 = L.ConvBNReLU( + in_channels=3, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.conv_layer1_2 = L.ConvBNReLU( + in_channels=64, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], out_channels=self.stage2_num_channels, name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2", + align_corners=align_corners) + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, out_channels=self.stage3_num_channels, name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3", + align_corners=align_corners) + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, out_channels=self.stage4_num_channels, name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4", + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + x0_h, x0_w = st4[0].shape[2:] + x1 = F.interpolate(st4[1], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x2 = F.interpolate(st4[2], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x3 = F.interpolate(st4[3], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x = paddle.concat([st4[0], x1, x2, x3], axis=1) + + return [x] + + +class Layer1(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, num_blocks: int, has_se: bool = False, name: str = None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv = x + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, in_channels: int, out_channels: int, name=None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + L.ConvBNReLU( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=3, + padding='same', + bias_attr=False)) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + L.ConvBNReLU( + in_channels=in_channels[-1], + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + self.conv_bn_func_list.append(residual) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(x[idx]) + else: + if idx < len(x): + outs.append(conv_bn_func(x[idx])) + else: + outs.append(conv_bn_func(x[-1])) + return outs + + +class Branches(nn.Layer): + def __init__(self, num_blocks: int, in_channels: int, out_channels: int, has_se: bool = False, name: str = None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, input in enumerate(x): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + has_se: bool, + stride: int = 1, + downsample: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = L.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + self.conv2 = L.ConvBNReLU( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + + self.conv3 = L.ConvBN( + in_channels=num_filters, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = L.ConvBN( + in_channels=num_channels, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, num_filters=num_filters * 4, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv3 = self.se(conv3) + + y = conv3 + residual + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + stride: int = 1, + has_se: bool = False, + downsample: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = L.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + self.conv2 = L.ConvBN( + in_channels=num_filters, out_channels=num_filters, kernel_size=3, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = L.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer(num_channels=num_filters, num_filters=num_filters, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv2 = self.se(conv2) + + y = conv2 + residual + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, reduction_ratio: int, name: str = None): + super(SELayer, self).__init__() + + self.pool2d_gap = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = nn.Linear( + num_channels, med_ch, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = nn.Linear( + med_ch, num_filters, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + pool = self.pool2d_gap(x) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape(excitation, shape=[-1, self._num_channels, 1, 1]) + out = x * excitation + return out + + +class Stage(nn.Layer): + def __init__(self, + num_channels: int, + num_modules: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + + self.stage_func_list.append(stage_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = x + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(nn.Layer): + def __init__(self, + num_channels: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: str = False): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, in_channels=num_channels, out_channels=num_filters, has_se=has_se, name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name, + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = self.branches_func(x) + out = self.fuse_func(out) + return out + + +class FuseLayers(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + self.align_corners = align_corners + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + L.ConvBN( + in_channels=in_channels[j], + out_channels=out_channels[i], + kernel_size=1, + padding='same', + bias_attr=False)) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + L.ConvBN( + in_channels=pre_num_filters, + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + L.ConvBNReLU( + in_channels=pre_num_filters, + out_channels=out_channels[j], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = x[i] + residual_shape = residual.shape[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + y = F.interpolate(y, residual_shape, mode='bilinear', align_corners=self.align_corners) + residual = residual + y + elif j < i: + y = x[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = residual + y + + residual = F.relu(residual) + outs.append(residual) + + return outs diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_voc/layers.py b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..8758f54f9a840ae49fd6e424b98bfe1dd61e13ec --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/layers.py @@ -0,0 +1,296 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu', name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: Tuple[int], + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU(in_channels=out_channels * out_size, out_channels=out_channels, kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate(y, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate(img_avg, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/fcn_hrnetw18_voc/module.py b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/module.py new file mode 100644 index 0000000000000000000000000000000000000000..39e04c6325abd83404c1c81faea59652a4b3f6d1 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw18_voc/module.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from fcn_hrnetw18_voc.hrnet import HRNet_W18 +import fcn_hrnetw18_voc.layers as layers + + +@moduleinfo( + name="fcn_hrnetw18_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="Fcn_hrnetw18 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class FCN(nn.Layer): + """ + A simple implementation for FCN based on PaddlePaddle. + + The original article refers to + Evan Shelhamer, et, al. "Fully Convolutional Networks for Semantic Segmentation" + (https://arxiv.org/abs/1411.4038). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int] = (-1, ), + channels: int = None, + align_corners: bool = False, + pretrained: str = None): + super(FCN, self).__init__() + + self.backbone = HRNet_W18() + backbone_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + + self.head = FCNHead(num_classes, backbone_indices, backbone_channels, channels) + + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate(logit, paddle.shape(x)[2:], mode='bilinear', align_corners=self.align_corners) + for logit in logit_list + ] + + +class FCNHead(nn.Layer): + """ + A simple implementation for FCNHead based on PaddlePaddle + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + backbone_channels (tuple): The values of backbone channels. + Default: (270, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + pretrained (str, optional): The path of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int, + backbone_indices: Tuple[int] = (-1, ), + backbone_channels: Tuple[int] = (270, ), + channels: int = None): + super(FCNHead, self).__init__() + + self.num_classes = num_classes + self.backbone_indices = backbone_indices + if channels is None: + channels = backbone_channels[0] + + self.conv_1 = layers.ConvBNReLU( + in_channels=backbone_channels[0], out_channels=channels, kernel_size=1, padding='same', stride=1) + self.cls = nn.Conv2D(in_channels=channels, out_channels=self.num_classes, kernel_size=1, stride=1, padding=0) + + def forward(self, feat_list: nn.Layer) -> List[paddle.Tensor]: + logit_list = [] + x = feat_list[self.backbone_indices[0]] + x = self.conv_1(x) + logit = self.cls(x) + logit_list.append(logit) + return logit_list diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/README.md b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..eb7ab11f6d3ee959fcd44977e765511e7c8cbc30 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/README.md @@ -0,0 +1,174 @@ +# PaddleHub 图像分割 + +## 模型预测 + + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw48_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用fcn_hrnetw48_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='fcn_hrnetw48_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw48_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m fcn_hrnetw48_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/fcn_hrnetw48_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/hrnet.py b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..72d29357247626cc38c07e586b2f4dffc067513c --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/hrnet.py @@ -0,0 +1,528 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import List + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +import fcn_hrnetw48_cityscapes.layers as layers + + +class HRNet_W48(nn.Layer): + """ + The HRNet implementation based on PaddlePaddle. + The original article refers to + Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition" + (https://arxiv.org/pdf/1908.07919.pdf). + Args: + stage1_num_modules (int, optional): Number of modules for stage1. Default 1. + stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4). + stage1_num_channels (list, optional): Number of channels per branch for stage1. Default (64). + stage2_num_modules (int, optional): Number of modules for stage2. Default 1. + stage2_num_blocks (list, optional): Number of blocks per module for stage2. Default (4, 4). + stage2_num_channels (list, optional): Number of channels per branch for stage2. Default (48, 96). + stage3_num_modules (int, optional): Number of modules for stage3. Default 4. + stage3_num_blocks (list, optional): Number of blocks per module for stage3. Default (4, 4, 4). + stage3_num_channels (list, optional): Number of channels per branch for stage3. Default [48, 96, 192). + stage4_num_modules (int, optional): Number of modules for stage4. Default 3. + stage4_num_blocks (list, optional): Number of blocks per module for stage4. Default (4, 4, 4, 4). + stage4_num_channels (list, optional): Number of channels per branch for stage4. Default (48, 96, 192. 384). + has_se (bool, optional): Whether to use Squeeze-and-Excitation module. Default False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + stage1_num_modules: int = 1, + stage1_num_blocks: List[int] = [4], + stage1_num_channels: List[int] = [64], + stage2_num_modules: int = 1, + stage2_num_blocks: List[int] = [4, 4], + stage2_num_channels: List[int] = [48, 96], + stage3_num_modules: int = 4, + stage3_num_blocks: List[int] = [4, 4, 4], + stage3_num_channels: List[int] = [48, 96, 192], + stage4_num_modules: int = 3, + stage4_num_blocks: List[int] = [4, 4, 4, 4], + stage4_num_channels: List[int] = [48, 96, 192, 384], + has_se=False, + align_corners=False): + super(HRNet_W48, self).__init__() + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.align_corners = align_corners + self.feat_channels = [sum(stage4_num_channels)] + + self.conv_layer1_1 = layers.ConvBNReLU( + in_channels=3, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.conv_layer1_2 = layers.ConvBNReLU( + in_channels=64, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], out_channels=self.stage2_num_channels, name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2", + align_corners=align_corners) + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, out_channels=self.stage3_num_channels, name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3", + align_corners=align_corners) + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, out_channels=self.stage4_num_channels, name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4", + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + size = paddle.shape(st4[0])[2:] + x1 = F.interpolate(st4[1], size, mode='bilinear', align_corners=self.align_corners) + x2 = F.interpolate(st4[2], size, mode='bilinear', align_corners=self.align_corners) + x3 = F.interpolate(st4[3], size, mode='bilinear', align_corners=self.align_corners) + x = paddle.concat([st4[0], x1, x2, x3], axis=1) + + return [x] + + +class Layer1(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, num_blocks: int, has_se: bool = False, name: str = None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv = x + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, in_channels: int, out_channels: int, name: str = None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=3, + padding='same', + bias_attr=False)) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[-1], + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + self.conv_bn_func_list.append(residual) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(x[idx]) + else: + if idx < len(x): + outs.append(conv_bn_func(x[idx])) + else: + outs.append(conv_bn_func(x[-1])) + return outs + + +class Branches(nn.Layer): + def __init__(self, num_blocks: int, in_channels: int, out_channels: int, has_se: bool = False, name: str = None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, input in enumerate(x): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + has_se: bool, + stride: int = 1, + downsample: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + self.conv2 = layers.ConvBNReLU( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + + self.conv3 = layers.ConvBN( + in_channels=num_filters, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBN( + in_channels=num_channels, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, num_filters=num_filters * 4, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv3 = self.se(conv3) + + y = conv3 + residual + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + stride: int = 1, + has_se: bool = False, + downsample: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + self.conv2 = layers.ConvBN( + in_channels=num_filters, out_channels=num_filters, kernel_size=3, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer(num_channels=num_filters, num_filters=num_filters, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv2 = self.se(conv2) + + y = conv2 + residual + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, reduction_ratio: float, name: str = None): + super(SELayer, self).__init__() + + self.pool2d_gap = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = nn.Linear( + num_channels, med_ch, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = nn.Linear( + med_ch, num_filters, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + pool = self.pool2d_gap(x) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape(excitation, shape=[-1, self._num_channels, 1, 1]) + out = x * excitation + return out + + +class Stage(nn.Layer): + def __init__(self, + num_channels: int, + num_modules: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + + self.stage_func_list.append(stage_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = x + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(nn.Layer): + def __init__(self, + num_channels: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, in_channels=num_channels, out_channels=num_filters, has_se=has_se, name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name, + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = self.branches_func(x) + out = self.fuse_func(out) + return out + + +class FuseLayers(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + self.align_corners = align_corners + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + layers.ConvBN( + in_channels=in_channels[j], + out_channels=out_channels[i], + kernel_size=1, + padding='same', + bias_attr=False)) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + layers.ConvBN( + in_channels=pre_num_filters, + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + layers.ConvBNReLU( + in_channels=pre_num_filters, + out_channels=out_channels[j], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = x[i] + residual_shape = paddle.shape(residual)[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + y = F.interpolate(y, residual_shape, mode='bilinear', align_corners=self.align_corners) + residual = residual + y + elif j < i: + y = x[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = residual + y + + residual = F.relu(residual) + outs.append(residual) + + return outs diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/layers.py b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..09fd7d68e8a34a84c921dbe230749869040308c3 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/layers.py @@ -0,0 +1,297 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu', name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: Tuple[int], + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU(in_channels=out_channels * out_size, out_channels=out_channels, kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate(y, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate(img_avg, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/module.py b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ff6d98c465fd6bc7ffed34c9142d1bdb89c60f --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_cityscapes/module.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from fcn_hrnetw48_cityscapes.hrnet import HRNet_W48 +import fcn_hrnetw48_cityscapes.layers as layers + + +@moduleinfo( + name="fcn_hrnetw48_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="Fcn_hrnetw48 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class FCN(nn.Layer): + """ + A simple implementation for FCN based on PaddlePaddle. + + The original article refers to + Evan Shelhamer, et, al. "Fully Convolutional Networks for Semantic Segmentation" + (https://arxiv.org/abs/1411.4038). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: Tuple[int] = (-1, ), + channels: int = None, + align_corners: bool = False, + pretrained: str = None): + super(FCN, self).__init__() + + self.backbone = HRNet_W48() + backbone_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + + self.head = FCNHead(num_classes, backbone_indices, backbone_channels, channels) + + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate(logit, paddle.shape(x)[2:], mode='bilinear', align_corners=self.align_corners) + for logit in logit_list + ] + + +class FCNHead(nn.Layer): + """ + A simple implementation for FCNHead based on PaddlePaddle + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + backbone_channels (tuple): The values of backbone channels. + Default: (270, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + pretrained (str, optional): The path of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int, + backbone_indices: Tuple[int] = (-1, ), + backbone_channels: Tuple[int] = (270, ), + channels: int = None): + super(FCNHead, self).__init__() + + self.num_classes = num_classes + self.backbone_indices = backbone_indices + if channels is None: + channels = backbone_channels[0] + + self.conv_1 = layers.ConvBNReLU( + in_channels=backbone_channels[0], out_channels=channels, kernel_size=1, padding='same', stride=1) + self.cls = nn.Conv2D(in_channels=channels, out_channels=self.num_classes, kernel_size=1, stride=1, padding=0) + + def forward(self, feat_list: nn.Layer) -> List[paddle.Tensor]: + logit_list = [] + x = feat_list[self.backbone_indices[0]] + x = self.conv_1(x) + logit = self.cls(x) + logit_list.append(logit) + return logit_list diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_voc/README.md b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1c42e162681a358b0a72e4aa2ac053cd7303a7ae --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/README.md @@ -0,0 +1,174 @@ +# PaddleHub 图像分割 + +## 模型预测 + + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw48_voc') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用fcn_hrnetw48_voc模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='fcn_hrnetw48_voc', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='fcn_hrnetw48_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m fcn_hrnetw48_voc +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/fcn_hrnetw48_voc" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_voc/hrnet.py b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..421d70392370a2b962627cc5bcf6f25d775dc454 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/hrnet.py @@ -0,0 +1,528 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import List + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +import fcn_hrnetw48_voc.layers as layers + + +class HRNet_W48(nn.Layer): + """ + The HRNet implementation based on PaddlePaddle. + The original article refers to + Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition" + (https://arxiv.org/pdf/1908.07919.pdf). + Args: + stage1_num_modules (int, optional): Number of modules for stage1. Default 1. + stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4). + stage1_num_channels (list, optional): Number of channels per branch for stage1. Default (64). + stage2_num_modules (int, optional): Number of modules for stage2. Default 1. + stage2_num_blocks (list, optional): Number of blocks per module for stage2. Default (4, 4). + stage2_num_channels (list, optional): Number of channels per branch for stage2. Default (48, 96). + stage3_num_modules (int, optional): Number of modules for stage3. Default 4. + stage3_num_blocks (list, optional): Number of blocks per module for stage3. Default (4, 4, 4). + stage3_num_channels (list, optional): Number of channels per branch for stage3. Default [48, 96, 192). + stage4_num_modules (int, optional): Number of modules for stage4. Default 3. + stage4_num_blocks (list, optional): Number of blocks per module for stage4. Default (4, 4, 4, 4). + stage4_num_channels (list, optional): Number of channels per branch for stage4. Default (48, 96, 192. 384). + has_se (bool, optional): Whether to use Squeeze-and-Excitation module. Default False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + stage1_num_modules: int = 1, + stage1_num_blocks: List[int] = [4], + stage1_num_channels: List[int] = [64], + stage2_num_modules: int = 1, + stage2_num_blocks: List[int] = [4, 4], + stage2_num_channels: List[int] = [48, 96], + stage3_num_modules: int = 4, + stage3_num_blocks: List[int] = [4, 4, 4], + stage3_num_channels: List[int] = [48, 96, 192], + stage4_num_modules: int = 3, + stage4_num_blocks: List[int] = [4, 4, 4, 4], + stage4_num_channels: List[int] = [48, 96, 192, 384], + has_se=False, + align_corners=False): + super(HRNet_W48, self).__init__() + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.align_corners = align_corners + self.feat_channels = [sum(stage4_num_channels)] + + self.conv_layer1_1 = layers.ConvBNReLU( + in_channels=3, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.conv_layer1_2 = layers.ConvBNReLU( + in_channels=64, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], out_channels=self.stage2_num_channels, name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2", + align_corners=align_corners) + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, out_channels=self.stage3_num_channels, name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3", + align_corners=align_corners) + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, out_channels=self.stage4_num_channels, name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4", + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + size = paddle.shape(st4[0])[2:] + x1 = F.interpolate(st4[1], size, mode='bilinear', align_corners=self.align_corners) + x2 = F.interpolate(st4[2], size, mode='bilinear', align_corners=self.align_corners) + x3 = F.interpolate(st4[3], size, mode='bilinear', align_corners=self.align_corners) + x = paddle.concat([st4[0], x1, x2, x3], axis=1) + + return [x] + + +class Layer1(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, num_blocks: int, has_se: bool = False, name: str = None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv = x + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, in_channels: int, out_channels: int, name: str = None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=3, + padding='same', + bias_attr=False)) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + layers.ConvBNReLU( + in_channels=in_channels[-1], + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + self.conv_bn_func_list.append(residual) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(x[idx]) + else: + if idx < len(x): + outs.append(conv_bn_func(x[idx])) + else: + outs.append(conv_bn_func(x[-1])) + return outs + + +class Branches(nn.Layer): + def __init__(self, num_blocks: int, in_channels: int, out_channels: int, has_se: bool = False, name: str = None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, input in enumerate(x): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + has_se: bool, + stride: int = 1, + downsample: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + self.conv2 = layers.ConvBNReLU( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + + self.conv3 = layers.ConvBN( + in_channels=num_filters, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBN( + in_channels=num_channels, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, num_filters=num_filters * 4, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv3 = self.se(conv3) + + y = conv3 + residual + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + stride: int = 1, + has_se: bool = False, + downsample: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = layers.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + self.conv2 = layers.ConvBN( + in_channels=num_filters, out_channels=num_filters, kernel_size=3, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = layers.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer(num_channels=num_filters, num_filters=num_filters, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv2 = self.se(conv2) + + y = conv2 + residual + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, reduction_ratio: float, name: str = None): + super(SELayer, self).__init__() + + self.pool2d_gap = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = nn.Linear( + num_channels, med_ch, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = nn.Linear( + med_ch, num_filters, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + pool = self.pool2d_gap(x) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape(excitation, shape=[-1, self._num_channels, 1, 1]) + out = x * excitation + return out + + +class Stage(nn.Layer): + def __init__(self, + num_channels: int, + num_modules: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + + self.stage_func_list.append(stage_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = x + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(nn.Layer): + def __init__(self, + num_channels: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, in_channels=num_channels, out_channels=num_filters, has_se=has_se, name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name, + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = self.branches_func(x) + out = self.fuse_func(out) + return out + + +class FuseLayers(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + self.align_corners = align_corners + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + layers.ConvBN( + in_channels=in_channels[j], + out_channels=out_channels[i], + kernel_size=1, + padding='same', + bias_attr=False)) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + layers.ConvBN( + in_channels=pre_num_filters, + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + layers.ConvBNReLU( + in_channels=pre_num_filters, + out_channels=out_channels[j], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = x[i] + residual_shape = paddle.shape(residual)[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + y = F.interpolate(y, residual_shape, mode='bilinear', align_corners=self.align_corners) + residual = residual + y + elif j < i: + y = x[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = residual + y + + residual = F.relu(residual) + outs.append(residual) + + return outs diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_voc/layers.py b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..aca5e911382235cb96d385091f1db261060bad7d --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/layers.py @@ -0,0 +1,298 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu', name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: Tuple[int], + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU(in_channels=out_channels * out_size, out_channels=out_channels, kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate(y, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate(img_avg, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/fcn_hrnetw48_voc/module.py b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b0a77b381cc224f0cb2f9f598d787a0a141c3d01 --- /dev/null +++ b/modules/image/semantic_segmentation/fcn_hrnetw48_voc/module.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +from fcn_hrnetw48_voc.hrnet import HRNet_W48 +import fcn_hrnetw48_voc.layers as layers + + +@moduleinfo( + name="fcn_hrnetw48_voc", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="Fcn_hrnetw48 is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class FCN(nn.Layer): + """ + A simple implementation for FCN based on PaddlePaddle. + + The original article refers to + Evan Shelhamer, et, al. "Fully Convolutional Networks for Semantic Segmentation" + (https://arxiv.org/abs/1411.4038). + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int = 21, + backbone_indices: Tuple[int] = (-1, ), + channels: int = None, + align_corners: bool = False, + pretrained: str = None): + super(FCN, self).__init__() + + self.backbone = HRNet_W48() + backbone_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + + self.head = FCNHead(num_classes, backbone_indices, backbone_channels, channels) + + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + feat_list = self.backbone(x) + logit_list = self.head(feat_list) + return [ + F.interpolate(logit, paddle.shape(x)[2:], mode='bilinear', align_corners=self.align_corners) + for logit in logit_list + ] + + +class FCNHead(nn.Layer): + """ + A simple implementation for FCNHead based on PaddlePaddle + + Args: + num_classes (int): The unique number of target classes. + backbone_indices (tuple, optional): The values in the tuple indicate the indices of output of backbone. + Default: (-1, ). + backbone_channels (tuple): The values of backbone channels. + Default: (270, ). + channels (int, optional): The channels between conv layer and the last layer of FCNHead. + If None, it will be the number of channels of input features. Default: None. + pretrained (str, optional): The path of pretrained model. Default: None + """ + + def __init__(self, + num_classes: int, + backbone_indices: Tuple[int] = (-1, ), + backbone_channels: Tuple[int] = (270, ), + channels: int = None): + super(FCNHead, self).__init__() + + self.num_classes = num_classes + self.backbone_indices = backbone_indices + if channels is None: + channels = backbone_channels[0] + + self.conv_1 = layers.ConvBNReLU( + in_channels=backbone_channels[0], out_channels=channels, kernel_size=1, padding='same', stride=1) + self.cls = nn.Conv2D(in_channels=channels, out_channels=self.num_classes, kernel_size=1, stride=1, padding=0) + + def forward(self, feat_list: nn.Layer) -> List[paddle.Tensor]: + logit_list = [] + x = feat_list[self.backbone_indices[0]] + x = self.conv_1(x) + logit = self.cls(x) + logit_list.append(logit) + return logit_list diff --git a/modules/image/semantic_segmentation/hardnet_cityscapes/README.md b/modules/image/semantic_segmentation/hardnet_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..75a44dd551187029409ae788ad736fbb713f0e84 --- /dev/null +++ b/modules/image/semantic_segmentation/hardnet_cityscapes/README.md @@ -0,0 +1,173 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='hardnet_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用hardnet_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='hardnet_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='hardnet_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m hardnet_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/hardnet_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/hardnet_cityscapes/layers.py b/modules/image/semantic_segmentation/hardnet_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..cbcb7ad830fa82a87e1fbd86b1e59a63cc4ef579 --- /dev/null +++ b/modules/image/semantic_segmentation/hardnet_cityscapes/layers.py @@ -0,0 +1,185 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu' or os.environ.get('PADDLESEG_EXPORT_STAGE'): + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvReLUPool(nn.Layer): + """Basic conv bn pool layer.""" + + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.conv = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv(x) + x = F.relu(x) + x = F.pool2d(x, pool_size=2, pool_type="max", pool_stride=2) + return x + + +class SeparableConvBNReLU(nn.Layer): + """Basic separable conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class DepthwiseConvBN(nn.Layer): + """Basic depthwise conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, in_channels: int, inter_channels: int, out_channels: int, dropout_prob: float = 0.1): + super().__init__() + + self.conv_bn_relu = ConvBNReLU(in_channels=in_channels, out_channels=inter_channels, kernel_size=3, padding=1) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D(in_channels=inter_channels, out_channels=out_channels, kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x diff --git a/modules/image/semantic_segmentation/hardnet_cityscapes/module.py b/modules/image/semantic_segmentation/hardnet_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..3923bff5ae20dfd69433d46dcedfd6851d5f40ee --- /dev/null +++ b/modules/image/semantic_segmentation/hardnet_cityscapes/module.py @@ -0,0 +1,291 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import Union, Tuple, List + +import numpy as np +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +import hardnet_cityscapes.layers as layers + + +@moduleinfo( + name="hardnet_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="Hardnet is a segmentation model trained by PascalVoc.", + version="1.0.0", + meta=ImageSegmentationModule) +class HarDNet(nn.Layer): + """ + [Real Time] The FC-HardDNet 70 implementation based on PaddlePaddle. + The original article refers to + Chao, Ping, et al. "HarDNet: A Low Memory Traffic Network" + (https://arxiv.org/pdf/1909.00948.pdf) + + Args: + num_classes (int): The unique number of target classes. + stem_channels (tuple|list, optional): The number of channels before the encoder. Default: (16, 24, 32, 48). + ch_list (tuple|list, optional): The number of channels at each block in the encoder. Default: (64, 96, 160, 224, 320). + grmul (float, optional): The channel multiplying factor in HarDBlock, which is m in the paper. Default: 1.7. + gr (tuple|list, optional): The growth rate in each HarDBlock, which is k in the paper. Default: (10, 16, 18, 24, 32). + n_layers (tuple|list, optional): The number of layers in each HarDBlock. Default: (4, 4, 8, 8, 8). + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + stem_channels: Tuple[int] = (16, 24, 32, 48), + ch_list: Tuple[int] = (64, 96, 160, 224, 320), + grmul: float = 1.7, + gr: Tuple[int] = (10, 16, 18, 24, 32), + n_layers: Tuple[int] = (4, 4, 8, 8, 8), + align_corners: bool = False, + pretrained: str = None): + + super(HarDNet, self).__init__() + self.align_corners = align_corners + self.pretrained = pretrained + encoder_blks_num = len(n_layers) + decoder_blks_num = encoder_blks_num - 1 + encoder_in_channels = stem_channels[3] + + self.stem = nn.Sequential( + layers.ConvBNReLU(3, stem_channels[0], kernel_size=3, bias_attr=False), + layers.ConvBNReLU(stem_channels[0], stem_channels[1], kernel_size=3, bias_attr=False), + layers.ConvBNReLU(stem_channels[1], stem_channels[2], kernel_size=3, stride=2, bias_attr=False), + layers.ConvBNReLU(stem_channels[2], stem_channels[3], kernel_size=3, bias_attr=False)) + + self.encoder = Encoder(encoder_blks_num, encoder_in_channels, ch_list, gr, grmul, n_layers) + + skip_connection_channels = self.encoder.get_skip_channels() + decoder_in_channels = self.encoder.get_out_channels() + + self.decoder = Decoder(decoder_blks_num, decoder_in_channels, skip_connection_channels, gr, grmul, n_layers, + align_corners) + + self.cls_head = nn.Conv2D(in_channels=self.decoder.get_out_channels(), out_channels=num_classes, kernel_size=1) + + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + input_shape = paddle.shape(x)[2:] + x = self.stem(x) + x, skip_connections = self.encoder(x) + x = self.decoder(x, skip_connections) + logit = self.cls_head(x) + logit = F.interpolate(logit, size=input_shape, mode="bilinear", align_corners=self.align_corners) + return [logit] + + +class Encoder(nn.Layer): + """The Encoder implementation of FC-HardDNet 70. + + Args: + n_blocks (int): The number of blocks in the Encoder module. + in_channels (int): The number of input channels. + ch_list (tuple|list): The number of channels at each block in the encoder. + grmul (float): The channel multiplying factor in HarDBlock, which is m in the paper. + gr (tuple|list): The growth rate in each HarDBlock, which is k in the paper. + n_layers (tuple|list): The number of layers in each HarDBlock. + """ + + def __init__(self, n_blocks: int, in_channels: int, ch_list: List[int], gr: List[int], grmul: float, + n_layers: List[int]): + super().__init__() + self.skip_connection_channels = [] + self.shortcut_layers = [] + self.blks = nn.LayerList() + ch = in_channels + for i in range(n_blocks): + blk = HarDBlock(ch, gr[i], grmul, n_layers[i]) + ch = blk.get_out_ch() + self.skip_connection_channels.append(ch) + self.blks.append(blk) + if i < n_blocks - 1: + self.shortcut_layers.append(len(self.blks) - 1) + self.blks.append(layers.ConvBNReLU(ch, ch_list[i], kernel_size=1, bias_attr=False)) + + ch = ch_list[i] + if i < n_blocks - 1: + self.blks.append(nn.AvgPool2D(kernel_size=2, stride=2)) + self.out_channels = ch + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + skip_connections = [] + for i in range(len(self.blks)): + x = self.blks[i](x) + if i in self.shortcut_layers: + skip_connections.append(x) + return x, skip_connections + + def get_skip_channels(self): + return self.skip_connection_channels + + def get_out_channels(self): + return self.out_channels + + +class Decoder(nn.Layer): + """The Decoder implementation of FC-HardDNet 70. + + Args: + n_blocks (int): The number of blocks in the Encoder module. + in_channels (int): The number of input channels. + skip_connection_channels (tuple|list): The channels of shortcut layers in encoder. + grmul (float): The channel multiplying factor in HarDBlock, which is m in the paper. + gr (tuple|list): The growth rate in each HarDBlock, which is k in the paper. + n_layers (tuple|list): The number of layers in each HarDBlock. + """ + + def __init__(self, + n_blocks: int, + in_channels: int, + skip_connection_channels: List[paddle.Tensor], + gr: List[int], + grmul: float, + n_layers: List[int], + align_corners: bool = False): + super().__init__() + prev_block_channels = in_channels + self.n_blocks = n_blocks + self.dense_blocks_up = nn.LayerList() + self.conv1x1_up = nn.LayerList() + + for i in range(n_blocks - 1, -1, -1): + cur_channels_count = prev_block_channels + skip_connection_channels[i] + conv1x1 = layers.ConvBNReLU(cur_channels_count, cur_channels_count // 2, kernel_size=1, bias_attr=False) + blk = HarDBlock(base_channels=cur_channels_count // 2, growth_rate=gr[i], grmul=grmul, n_layers=n_layers[i]) + + self.conv1x1_up.append(conv1x1) + self.dense_blocks_up.append(blk) + + prev_block_channels = blk.get_out_ch() + + self.out_channels = prev_block_channels + self.align_corners = align_corners + + def forward(self, x: paddle.Tensor, skip_connections: List[paddle.Tensor]) -> paddle.Tensor: + for i in range(self.n_blocks): + skip = skip_connections.pop() + x = F.interpolate(x, size=paddle.shape(skip)[2:], mode="bilinear", align_corners=self.align_corners) + x = paddle.concat([x, skip], axis=1) + x = self.conv1x1_up[i](x) + x = self.dense_blocks_up[i](x) + return x + + def get_out_channels(self): + return self.out_channels + + +class HarDBlock(nn.Layer): + """The HarDBlock implementation + + Args: + base_channels (int): The base channels. + growth_rate (tuple|list): The growth rate. + grmul (float): The channel multiplying factor. + n_layers (tuple|list): The number of layers. + keepBase (bool, optional): A bool value indicates whether concatenating the first layer. Default: False. + """ + + def __init__(self, + base_channels: int, + growth_rate: List[int], + grmul: float, + n_layers: List[int], + keepBase: bool = False): + super().__init__() + self.keepBase = keepBase + self.links = [] + layers_ = [] + self.out_channels = 0 + for i in range(n_layers): + outch, inch, link = get_link(i + 1, base_channels, growth_rate, grmul) + + self.links.append(link) + layers_.append(layers.ConvBNReLU(inch, outch, kernel_size=3, bias_attr=False)) + if (i % 2 == 0) or (i == n_layers - 1): + self.out_channels += outch + self.layers = nn.LayerList(layers_) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + layers_ = [x] + for layer in range(len(self.layers)): + link = self.links[layer] + tin = [] + for i in link: + tin.append(layers_[i]) + if len(tin) > 1: + x = paddle.concat(tin, axis=1) + else: + x = tin[0] + out = self.layers[layer](x) + layers_.append(out) + + t = len(layers_) + out_ = [] + for i in range(t): + if (i == 0 and self.keepBase) or \ + (i == t - 1) or (i % 2 == 1): + out_.append(layers_[i]) + out = paddle.concat(out_, 1) + + return out + + def get_out_ch(self): + return self.out_channels + + +def get_link(layer: int, base_ch: int, growth_rate: List[int], grmul: float) -> Tuple: + if layer == 0: + return base_ch, 0, [] + out_channels = growth_rate + link = [] + for i in range(10): + dv = 2**i + if layer % dv == 0: + k = layer - dv + link.insert(0, k) + if i > 0: + out_channels *= grmul + out_channels = int(int(out_channels + 1) / 2) * 2 + in_channels = 0 + for i in link: + ch, _, _ = get_link(i, base_ch, growth_rate, grmul) + in_channels += ch + return out_channels, in_channels, link diff --git a/modules/image/semantic_segmentation/humanseg_lite/README.md b/modules/image/semantic_segmentation/humanseg_lite/README.md index 1ddb1e182c9315f950a9c3f13bc77fd4945c90d0..effab0ff515694b2e376711a097c76ab564fdcbe 100644 --- a/modules/image/semantic_segmentation/humanseg_lite/README.md +++ b/modules/image/semantic_segmentation/humanseg_lite/README.md @@ -1,205 +1,250 @@ -## 模型概述 +# humanseg_lite + +|模型名称|humanseg_lite| +| :--- | :---: | +|类别|图像-图像分割| +|网络|shufflenet| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|541k| +|指标|-| +|最新更新日期|2021-02-26| + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例: +

+ +

+- ### 模型介绍 + + - HumanSeg_lite是在ShuffleNetV2网络结构的基础上进行优化,进一步减小了网络规模,网络大小只有541K,量化后只有187K, 适用于手机自拍人像分割,且能在移动端进行实时分割。 + + - 更多详情请参考:[humanseg_lite](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/HumanSeg) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install humanseg_lite + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + ``` + hub run humanseg_lite --input_path "/PATH/TO/IMAGE" + ``` +- ### 2、代码示例 + + - 图片分割及视频分割代码示例: -HumanSeg_lite是基于ShuffleNetV2网络结构的基础上进行优化的人像分割模型,进一步减小了网络规模,网络大小只有541K,量化后只有187K,适用于手机自拍人像分割等实时分割场景。 + ```python + import cv2 + import paddlehub as hub + human_seg = hub.Module(name='humanseg_lite') + im = cv2.imread('/PATH/TO/IMAGE') + #visualization=True可以用于查看人像分割图片效果,可设置为False提升运行速度。 + res = human_seg.segment(images=[im],visualization=True) + print(res[0]['data']) + human_seg.video_segment('/PATH/TO/VIDEO') + human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') -## 命令行预测 + ``` + - 视频流预测代码示例: -``` -hub run humanseg_lite --input_path "/PATH/TO/IMAGE" + ```python + import cv2 + import numpy as np + import paddlehub as hub -``` + human_seg = hub.Module('humanseg_lite') + cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'humanseg_lite_video.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() -## API + ``` -```python -def segment(images=None, - paths=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='humanseg_lite_output') -``` +- ### 3、API -预测API,用于人像分割。 + ```python + def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_lite_output') + ``` -**参数** + - 预测API,用于人像分割。 -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + - **参数** -**返回** + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * batch\_size (int): batch 的大小; + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 + - **返回** -```python -def video_stream_segment(self, - frame_org, - frame_id, - prev_gray, - prev_cfd, - use_gpu=False): -``` + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 -预测API,用于逐帧对视频人像分割。 -**参数** + ```python + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + ``` -* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* frame_id (int): 当前帧的编号; -* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; -* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + - 预测API,用于逐帧对视频人像分割。 + - **参数** -**返回** + * frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * frame_id (int): 当前帧的编号; + * prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; + * prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 -* cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; -* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 + - **返回** -```python -def video_segment(self, - video_path=None, - use_gpu=False, - save_dir='humanseg_lite_video_result'): -``` + * img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 + * cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; + * optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 -预测API,用于视频人像分割。 -**参数** + ```python + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_lite_video_result'): + ``` -* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + - 预测API,用于视频人像分割。 + - **参数** -```python -def save_inference_model(dirname='humanseg_lite_model', - model_filename=None, - params_filename=None, - combined=True) -``` + * video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 -将模型保存到指定路径。 -**参数** + ```python + def save_inference_model(dirname='humanseg_lite_model', + model_filename=None, + params_filename=None, + combined=True) + ``` -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - 将模型保存到指定路径。 -## 代码示例 + - **参数** + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -图片分割及视频分割代码示例: -```python -import cv2 -import paddlehub as hub +## 四、服务部署 -human_seg = hub.Module(name='humanseg_lite') -im = cv2.imread('/PATH/TO/IMAGE') -#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 -res = human_seg.segment(images=[im],visualization=True) -print(res[0]['data']) -human_seg.video_segment('/PATH/TO/VIDEO') -human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') +- PaddleHub Serving可以部署一个人像分割的在线服务。 -``` -视频流预测代码示例: -```python -import cv2 -import numpy as np -import paddlehub as hub +- ### 第一步:启动PaddleHub Serving -human_seg = hub.Module('humanseg_lite') -cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') -fps = cap_video.get(cv2.CAP_PROP_FPS) -save_path = 'humanseg_lite_video.avi' -width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) -height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) -cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) -prev_gray = None -prev_cfd = None -while cap_video.isOpened(): - ret, frame_org = cap_video.read() - if ret: - [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) - img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) - bg_im = np.ones_like(img_matting) * 255 - comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) - cap_out.write(comb) - else: - break + - 运行启动命令: -cap_video.release() -cap_out.release() + ```shell + $ hub serving start -m humanseg_lite + ``` -``` -## 服务部署 + - 这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 -PaddleHub Serving可以部署一个人像分割的在线服务。 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -## 第一步:启动PaddleHub Serving +- ### 第二步:发送预测请求 -运行启动命令: -```shell -$ hub serving start -m humanseg_lite -``` + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + ```python + import requests + import json + import base64 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + import cv2 + import numpy as np -## 第二步:发送预测请求 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/humanseg_lite" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) -```python -import requests -import json -import base64 - -import cv2 -import numpy as np + # 保存图片 + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_lite.png", rgba) + ``` -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data - -# 发送HTTP请求 -org_im = cv2.imread('PATH/TO/IMAGE') -data = {'images':[cv2_to_base64(org_im)]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/humanseg_lite" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 保存图片 -mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) -rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) -cv2.imwrite("segment_human_lite.png", rgba) -``` -### 查看代码 - -https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/HumanSeg - - - -### 依赖 - -paddlepaddle >= 1.8.0 - -paddlehub >= 1.7.1 + +## 五、更新历史 + +* 1.0.0 + + 初始发布 +* 1.1.0 + + 新增视频人像分割接口 + + 新增视频流人像分割接口 +* 1.1.1 + + 修复cudnn为8.0.4显存泄露问题 diff --git a/modules/image/semantic_segmentation/humanseg_mobile/README.md b/modules/image/semantic_segmentation/humanseg_mobile/README.md index ae767d34b528884afa904260d3417b8b598c75db..2e65c49b47a6c8751c4581bef5a7258e872cd078 100644 --- a/modules/image/semantic_segmentation/humanseg_mobile/README.md +++ b/modules/image/semantic_segmentation/humanseg_mobile/README.md @@ -1,208 +1,254 @@ -## 模型概述 +# humanseg_mobile + +|模型名称|humanseg_mobile| +| :--- | :---: | +|类别|图像-图像分割| +|网络|hrnet| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|5.8MB| +|指标|-| +|最新更新日期|2021-02-26| + -HumanSeg-mobile是基于HRNet(Deep High-Resolution Representation Learning for Visual Recognition)的人像分割网络。HRNet在特征提取过程中保持了高分辨率的信息,保持了物体的细节信息,并可通过控制每个分支的通道数调整模型的大小。HumanSeg-mobile采用了HRNet_w18_small_v1的网络结构,模型大小只有5.8M, 适用于移动端或服务端CPU的前置摄像头场景。 -## 命令行预测 +## 一、模型基本信息 -``` -hub run humanseg_mobile --input_path "/PATH/TO/IMAGE" +- ### 应用效果展示 -``` + + - 样例结果示例: +

+ +

+ +- ### 模型介绍 -## API + - HumanSeg-mobile采用了HRNet_w18_small_v1的网络结构,模型大小只有5.8M, 适用于移动端或服务端CPU的前置摄像头场景。 -```python -def segment(images=None, - paths=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='humanseg_mobile_output') -``` + - 更多详情请参考:[humanseg_mobile](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/HumanSeg) -预测API,用于人像分割。 +## 二、安装 -**参数** +- ### 1、环境依赖 -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + - paddlepaddle >= 2.0.0 -**返回** + - paddlehub >= 2.0.0 -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 +- ### 2、安装 + + - ```shell + $ hub install humanseg_mobile + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API预测 -```python -def video_stream_segment(self, - frame_org, - frame_id, - prev_gray, - prev_cfd, - use_gpu=False): -``` +- ### 1、命令行预测 -预测API,用于逐帧对视频人像分割。 + ``` + hub run humanseg_mobile --input_path "/PATH/TO/IMAGE" + ``` +- ### 2、代码示例 -**参数** + - 图片分割及视频分割代码示例: -* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* frame_id (int): 当前帧的编号; -* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; -* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + ```python + import cv2 + import paddlehub as hub + human_seg = hub.Module(name='humanseg_mobile') + im = cv2.imread('/PATH/TO/IMAGE') + #visualization=True可以用于查看人像分割图片效果,可设置为False提升运行速度。 + res = human_seg.segment(images=[im],visualization=True) + print(res[0]['data']) + human_seg.video_segment('/PATH/TO/VIDEO') + human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') -**返回** + ``` + - 视频流预测代码示例: -* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 -* cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; -* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 + ```python + import cv2 + import numpy as np + import paddlehub as hub + human_seg = hub.Module('humanseg_mobile') + cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'humanseg_mobile_video.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break -```python -def video_segment(self, - video_path=None, - use_gpu=False, - save_dir='humanseg_mobile_video_result'): -``` + cap_video.release() + cap_out.release() -预测API,用于视频人像分割。 + ``` -**参数** +- ### 3、API -* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + ```python + def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_mobile_output') + ``` + - 预测API,用于人像分割。 -```python -def save_inference_model(dirname='humanseg_mobile_model', - model_filename=None, - params_filename=None, - combined=True) -``` + - **参数** -将模型保存到指定路径。 + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * batch\_size (int): batch 的大小; + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -**参数** + - **返回** -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 -## 代码示例 -图片分割及视频分割代码示例: + ```python + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + ``` -```python -import cv2 -import paddlehub as hub + - 预测API,用于逐帧对视频人像分割。 -human_seg = hub.Module(name='humanseg_mobile') -im = cv2.imread('/PATH/TO/IMAGE') -#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 -res = human_seg.segment(images=[im],visualization=True) -print(res[0]['data']) -human_seg.video_segment('/PATH/TO/VIDEO') -human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + - **参数** -``` -视频流预测代码示例: + * frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * frame_id (int): 当前帧的编号; + * prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; + * prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -```python -import cv2 -import numpy as np -import paddlehub as hub -human_seg = hub.Module('humanseg_mobile') -cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') -fps = cap_video.get(cv2.CAP_PROP_FPS) -save_path = 'humanseg_mobile_video.avi' -width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) -height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) -cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) -prev_gray = None -prev_cfd = None -while cap_video.isOpened(): - ret, frame_org = cap_video.read() - if ret: - [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) - img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) - bg_im = np.ones_like(img_matting) * 255 - comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) - cap_out.write(comb) - else: - break + - **返回** -cap_video.release() -cap_out.release() + * img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 + * cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; + * optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 -``` -## 服务部署 + ```python + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_mobile_video_result'): + ``` -PaddleHub Serving可以部署一个人像分割的在线服务。 + - 预测API,用于视频人像分割。 -## 第一步:启动PaddleHub Serving + - **参数** -运行启动命令: + * video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 -```shell -$ hub serving start -m humanseg_mobile -``` -这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + ```python + def save_inference_model(dirname='humanseg_mobile_model', + model_filename=None, + params_filename=None, + combined=True) + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 将模型保存到指定路径。 -## 第二步:发送预测请求 + - **参数** + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 +## 四、服务部署 -```python -import requests -import json -import base64 +- PaddleHub Serving可以部署一个人像分割的在线服务。 -import cv2 -import numpy as np +- ### 第一步:启动PaddleHub Serving -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data + - 运行启动命令: -# 发送HTTP请求 -org_im = cv2.imread('/PATH/TO/IMAGE') -data = {'images':[cv2_to_base64(org_im)]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/humanseg_mobile" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + ```shell + $ hub serving start -m humanseg_mobile + ``` -# 保存图片 -mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) -rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) -cv2.imwrite("segment_human_mobile.png", rgba) -``` + - 这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 -### 查看代码 + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 - +- ### 第二步:发送预测请求 + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 -### 依赖 + ```python + import requests + import json + import base64 -paddlepaddle >= 1.8.0 + import cv2 + import numpy as np -paddlehub >= 1.7.1 + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/humanseg_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 保存图片 + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_mobile.png", rgba) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 +* 1.1.0 + + 新增视频人像分割接口 + + 新增视频流人像分割接口 +* 1.1.1 + + 修复cudnn为8.0.4显存泄露问题 diff --git a/modules/image/semantic_segmentation/humanseg_server/README.md b/modules/image/semantic_segmentation/humanseg_server/README.md index bf1b0a4c014a8be1e6e2e64572e7edbca65c1dd6..8845cb82cd109e6ddfb7b92f01f607333dada588 100644 --- a/modules/image/semantic_segmentation/humanseg_server/README.md +++ b/modules/image/semantic_segmentation/humanseg_server/README.md @@ -1,210 +1,253 @@ -## 模型概述 +# humanseg_server + +|模型名称|humanseg_server| +| :--- | :---: | +|类别|图像-图像分割| +|网络|hrnet| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|159MB| +|指标|-| +|最新更新日期|2021-02-26| -高精度模型,适用于服务端GPU且背景复杂的人像场景, 模型结构为Deeplabv3+/Xcetion65, 模型大小为158M,网络结构如图: -

-
-

-## 命令行预测 -``` -hub run humanseg_server --input_path "/PATH/TO/IMAGE" -``` +## 一、模型基本信息 +- ### 应用效果展示 + + - 样例结果示例: +

+ +

+- ### 模型介绍 -## API + - HumanSeg-server使用百度自建数据集进行训练,可用于人像分割,支持任意大小的图片输入。 -```python -def segment(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - visualization=False, - output_dir='humanseg_server_output'): -``` + - 更多详情请参考:[humanseg_server](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.2/contrib/HumanSeg) -预测API,用于人像分割。 +## 二、安装 -**参数** +- ### 1、环境依赖 -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* paths (list\[str\]): 图片的路径; -* batch\_size (int): batch 的大小; -* use\_gpu (bool): 是否使用 GPU; -* visualization (bool): 是否将识别结果保存为图片文件; -* output\_dir (str): 图片的保存路径。 + - paddlepaddle >=2.0.0 -**返回** - -* res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: - * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); - * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 - -```python -def video_stream_segment(self, - frame_org, - frame_id, - prev_gray, - prev_cfd, - use_gpu=False): -``` + - paddlehub >= 2.0.0 -预测API,用于逐帧对视频人像分割。 +- ### 2、安装 -**参数** + - ```shell + $ hub install humanseg_server + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; -* frame_id (int): 当前帧的编号; -* prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; -* prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图; -* use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 +## 三、模型API预测 +- ### 1、命令行预测 -**返回** + ``` + hub run humanseg_server --input_path "/PATH/TO/IMAGE" + ``` +- ### 2、代码示例 -* img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明); -* cur_gray (numpy.ndarray): 当前帧输入分割网络图像的灰度图; -* optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图。 + - 图片分割及视频分割代码示例: + ```python + import cv2 + import paddlehub as hub -```python -def video_segment(self, - video_path=None, - use_gpu=False, - save_dir='humanseg_server_video'): -``` + human_seg = hub.Module(name='humanseg_server') + im = cv2.imread('/PATH/TO/IMAGE') + #visualization=True可以用于查看人像分割图片效果,可设置为False提升运行速度。 + res = human_seg.segment(images=[im],visualization=True) + print(res[0]['data']) + human_seg.video_segment('/PATH/TO/VIDEO') + human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') -预测API,用于视频人像分割。 + ``` + - 视频流预测代码示例: -**参数** + ```python + import cv2 + import numpy as np + import paddlehub as hub -* video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果; -* use\_gpu (bool): 是否使用GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -* save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 + human_seg = hub.Module('humanseg_server') + cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') + fps = cap_video.get(cv2.CAP_PROP_FPS) + save_path = 'humanseg_server_video.avi' + width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) + prev_gray = None + prev_cfd = None + while cap_video.isOpened(): + ret, frame_org = cap_video.read() + if ret: + [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) + img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(img_matting) * 255 + comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) + cap_out.write(comb) + else: + break + cap_video.release() + cap_out.release() -```python -def save_inference_model(dirname, - model_filename=None, - params_filename=None, - combined=True): -``` + ``` -将模型保存到指定路径。 +- ### 3、API -**参数** + ```python + def segment(images=None, + paths=None, + batch_size=1, + use_gpu=False, + visualization=False, + output_dir='humanseg_server_output') + ``` -* dirname: 存在模型的目录名称 -* model\_filename: 模型文件名称,默认为\_\_model\_\_ -* params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) -* combined: 是否将参数保存到统一的一个文件中 + - 预测API,用于人像分割。 -## 代码示例 + - **参数** -图片分割及视频分割代码示例: -```python -import cv2 -import paddlehub as hub + * images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * paths (list\[str\]): 图片的路径; + * batch\_size (int): batch 的大小; + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * visualization (bool): 是否将识别结果保存为图片文件; + * output\_dir (str): 图片的保存路径。 -human_seg = hub.Module(name='humanseg_server') -im = cv2.imread('/PATH/TO/IMAGE') -#visualization=True可以用于查看超分图片效果,可设置为False提升运行速度。 -res = human_seg.segment(images=[im],visualization=True) -print(res[0]['data']) -human_seg.video_segment('/PATH/TO/VIDEO') -human_seg.save_inference_model('/PATH/TO/SAVE/MODEL') + - **返回** -``` -视频流预测代码示例: -```python -import cv2 -import numpy as np -import paddlehub as hub + * res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,关键字有 'save\_path', 'data',对应的取值为: + * save\_path (str, optional): 可视化图片的保存路径(仅当visualization=True时存在); + * data (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-255 (0为全透明,255为不透明),也即取值越大的像素点越可能为人体,取值越小的像素点越可能为背景。 -human_seg = hub.Module('humanseg_server') -cap_video = cv2.VideoCapture('\PATH\TO\VIDEO') -fps = cap_video.get(cv2.CAP_PROP_FPS) -save_path = 'humanseg_server_video.avi' -width = int(cap_video.get(cv2.CAP_PROP_FRAME_WIDTH)) -height = int(cap_video.get(cv2.CAP_PROP_FRAME_HEIGHT)) -cap_out = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, (width, height)) -prev_gray = None -prev_cfd = None -while cap_video.isOpened(): - ret, frame_org = cap_video.read() - if ret: - [img_matting, prev_gray, prev_cfd] = human_seg.video_stream_segment(frame_org=frame_org, frame_id=cap_video.get(1), prev_gray=prev_gray, prev_cfd=prev_cfd) - img_matting = np.repeat(img_matting[:, :, np.newaxis], 3, axis=2) - bg_im = np.ones_like(img_matting) * 255 - comb = (img_matting * frame_org + (1 - img_matting) * bg_im).astype(np.uint8) - cap_out.write(comb) - else: - break -cap_video.release() -cap_out.release() + ```python + def video_stream_segment(self, + frame_org, + frame_id, + prev_gray, + prev_cfd, + use_gpu=False): + ``` -``` + - 预测API,用于逐帧对视频人像分割。 -## 服务部署 + - **参数** -PaddleHub Serving可以部署一个人像分割的在线服务。 + * frame_org (numpy.ndarray): 单帧图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + * frame_id (int): 当前帧的编号; + * prev_gray (numpy.ndarray): 前一帧输入网络图像的灰度图; + * prev_cfd (numpy.ndarray): 前一帧光流追踪图和预测结果融合图 + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; -## 第一步:启动PaddleHub Serving -运行启动命令: -```shell -$ hub serving start -m humanseg_server -``` + - **返回** -这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + * img_matting (numpy.ndarray): 人像分割结果,仅包含Alpha通道,取值为0-1 (0为全透明,1为不透明)。 + * cur_gray (numpy.ndarray): 当前帧输入网络图像的灰度图; + * optflow_map (numpy.ndarray): 当前帧光流追踪图和预测结果融合图 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 -## 第二步:发送预测请求 + ```python + def video_segment(self, + video_path=None, + use_gpu=False, + save_dir='humanseg_server_video_result'): + ``` -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - 预测API,用于视频人像分割。 -```python -import requests -import json -import base64 + - **参数** -import cv2 -import numpy as np + * video\_path (str): 待分割视频路径。若为None,则从本地摄像头获取视频,并弹出窗口显示在线分割结果。 + * use\_gpu (bool): 是否使用 GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置; + * save\_dir (str): 视频保存路径,仅在video\_path不为None时启用,保存离线视频处理结果。 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data -# 发送HTTP请求 -org_im = cv2.imread('PATH/TO/IMAGE') -data = {'images':[cv2_to_base64(org_im)]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/humanseg_server" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 保存图片 -mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) -rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) -cv2.imwrite("segment_human_server.png", rgba) -``` + ```python + def save_inference_model(dirname='humanseg_server_model', + model_filename=None, + params_filename=None, + combined=True) + ``` + - 将模型保存到指定路径。 -### 查看代码 + - **参数** + * dirname: 存在模型的目录名称 + * model\_filename: 模型文件名称,默认为\_\_model\_\_ + * params\_filename: 参数文件名称,默认为\_\_params\_\_(仅当`combined`为True时生效) + * combined: 是否将参数保存到统一的一个文件中 -https://github.com/PaddlePaddle/PaddleSeg/tree/develop/contrib/HumanSeg +## 四、服务部署 +- PaddleHub Serving可以部署一个人像分割的在线服务。 -### 依赖 - -paddlepaddle >= 1.8.0 +- ### 第一步:启动PaddleHub Serving -paddlehub >= 1.7.1 + - 运行启动命令: + + ```shell + $ hub serving start -m humanseg_server + ``` + + - 这样就完成了一个人像分割的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + import base64 + + import cv2 + import numpy as np + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + + # 发送HTTP请求 + org_im = cv2.imread('/PATH/TO/IMAGE') + data = {'images':[cv2_to_base64(org_im)]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/humanseg_server" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 保存图片 + mask =cv2.cvtColor(base64_to_cv2(r.json()["results"][0]['data']), cv2.COLOR_BGR2GRAY) + rgba = np.concatenate((org_im, np.expand_dims(mask, axis=2)), axis=2) + cv2.imwrite("segment_human_server.png", rgba) + ``` + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 +* 1.1.0 + + 新增视频人像分割接口 + + 新增视频流人像分割接口 +* 1.1.1 + + 修复cudnn为8.0.4显存泄露问题 diff --git a/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/README.md b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e5a557d39ea40b17e67c2711db4a38fe212f5a50 --- /dev/null +++ b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/README.md @@ -0,0 +1,173 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='ocrnet_hrnetw18_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ocrnet_hrnetw18_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='ocrnet_hrnetw18_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='ocrnet_hrnetw18_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m ocrnet_hrnetw18_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/ocrnet_hrnetw18_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/hrnet.py b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/hrnet.py new file mode 100644 index 0000000000000000000000000000000000000000..82f396340cf4db9269a6f140ccdd3d60364035e4 --- /dev/null +++ b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/hrnet.py @@ -0,0 +1,531 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + +import ocrnet_hrnetw18_cityscapes.layers as L + + +class HRNet_W18(nn.Layer): + """ + The HRNet implementation based on PaddlePaddle. + + The original article refers to + Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition" + (https://arxiv.org/pdf/1908.07919.pdf). + + Args: + stage1_num_modules (int, optional): Number of modules for stage1. Default 1. + stage1_num_blocks (list, optional): Number of blocks per module for stage1. Default (4). + stage1_num_channels (list, optional): Number of channels per branch for stage1. Default (64). + stage2_num_modules (int, optional): Number of modules for stage2. Default 1. + stage2_num_blocks (list, optional): Number of blocks per module for stage2. Default (4, 4). + stage2_num_channels (list, optional): Number of channels per branch for stage2. Default (18, 36). + stage3_num_modules (int, optional): Number of modules for stage3. Default 4. + stage3_num_blocks (list, optional): Number of blocks per module for stage3. Default (4, 4, 4). + stage3_num_channels (list, optional): Number of channels per branch for stage3. Default (18, 36, 72). + stage4_num_modules (int, optional): Number of modules for stage4. Default 3. + stage4_num_blocks (list, optional): Number of blocks per module for stage4. Default (4, 4, 4, 4). + stage4_num_channels (list, optional): Number of channels per branch for stage4. Default (18, 36, 72. 144). + has_se (bool, optional): Whether to use Squeeze-and-Excitation module. Default False. + align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even, + e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + """ + + def __init__(self, + stage1_num_modules: int = 1, + stage1_num_blocks: Tuple[int] = (4, ), + stage1_num_channels: Tuple[int] = (64, ), + stage2_num_modules: int = 1, + stage2_num_blocks: Tuple[int] = (4, 4), + stage2_num_channels: Tuple[int] = (18, 36), + stage3_num_modules: int = 4, + stage3_num_blocks: Tuple[int] = (4, 4, 4), + stage3_num_channels: Tuple[int] = (18, 36, 72), + stage4_num_modules: int = 3, + stage4_num_blocks: Tuple[int] = (4, 4, 4, 4), + stage4_num_channels: Tuple[int] = (18, 36, 72, 144), + has_se: bool = False, + align_corners: bool = False): + super(HRNet_W18, self).__init__() + + self.stage1_num_modules = stage1_num_modules + self.stage1_num_blocks = stage1_num_blocks + self.stage1_num_channels = stage1_num_channels + self.stage2_num_modules = stage2_num_modules + self.stage2_num_blocks = stage2_num_blocks + self.stage2_num_channels = stage2_num_channels + self.stage3_num_modules = stage3_num_modules + self.stage3_num_blocks = stage3_num_blocks + self.stage3_num_channels = stage3_num_channels + self.stage4_num_modules = stage4_num_modules + self.stage4_num_blocks = stage4_num_blocks + self.stage4_num_channels = stage4_num_channels + self.has_se = has_se + self.align_corners = align_corners + self.feat_channels = [sum(stage4_num_channels)] + + self.conv_layer1_1 = L.ConvBNReLU( + in_channels=3, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.conv_layer1_2 = L.ConvBNReLU( + in_channels=64, out_channels=64, kernel_size=3, stride=2, padding='same', bias_attr=False) + + self.la1 = Layer1( + num_channels=64, + num_blocks=self.stage1_num_blocks[0], + num_filters=self.stage1_num_channels[0], + has_se=has_se, + name="layer2") + + self.tr1 = TransitionLayer( + in_channels=[self.stage1_num_channels[0] * 4], out_channels=self.stage2_num_channels, name="tr1") + + self.st2 = Stage( + num_channels=self.stage2_num_channels, + num_modules=self.stage2_num_modules, + num_blocks=self.stage2_num_blocks, + num_filters=self.stage2_num_channels, + has_se=self.has_se, + name="st2", + align_corners=align_corners) + + self.tr2 = TransitionLayer( + in_channels=self.stage2_num_channels, out_channels=self.stage3_num_channels, name="tr2") + self.st3 = Stage( + num_channels=self.stage3_num_channels, + num_modules=self.stage3_num_modules, + num_blocks=self.stage3_num_blocks, + num_filters=self.stage3_num_channels, + has_se=self.has_se, + name="st3", + align_corners=align_corners) + + self.tr3 = TransitionLayer( + in_channels=self.stage3_num_channels, out_channels=self.stage4_num_channels, name="tr3") + self.st4 = Stage( + num_channels=self.stage4_num_channels, + num_modules=self.stage4_num_modules, + num_blocks=self.stage4_num_blocks, + num_filters=self.stage4_num_channels, + has_se=self.has_se, + name="st4", + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv1 = self.conv_layer1_1(x) + conv2 = self.conv_layer1_2(conv1) + + la1 = self.la1(conv2) + + tr1 = self.tr1([la1]) + st2 = self.st2(tr1) + + tr2 = self.tr2(st2) + st3 = self.st3(tr2) + + tr3 = self.tr3(st3) + st4 = self.st4(tr3) + + x0_h, x0_w = st4[0].shape[2:] + x1 = F.interpolate(st4[1], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x2 = F.interpolate(st4[2], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x3 = F.interpolate(st4[3], (x0_h, x0_w), mode='bilinear', align_corners=self.align_corners) + x = paddle.concat([st4[0], x1, x2, x3], axis=1) + + return [x] + + +class Layer1(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, num_blocks: int, has_se: bool = False, name: str = None): + super(Layer1, self).__init__() + + self.bottleneck_block_list = [] + + for i in range(num_blocks): + bottleneck_block = self.add_sublayer( + "bb_{}_{}".format(name, i + 1), + BottleneckBlock( + num_channels=num_channels if i == 0 else num_filters * 4, + num_filters=num_filters, + has_se=has_se, + stride=1, + downsample=True if i == 0 else False, + name=name + '_' + str(i + 1))) + self.bottleneck_block_list.append(bottleneck_block) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + conv = x + for block_func in self.bottleneck_block_list: + conv = block_func(conv) + return conv + + +class TransitionLayer(nn.Layer): + def __init__(self, in_channels: int, out_channels: int, name=None): + super(TransitionLayer, self).__init__() + + num_in = len(in_channels) + num_out = len(out_channels) + self.conv_bn_func_list = [] + for i in range(num_out): + residual = None + if i < num_in: + if in_channels[i] != out_channels[i]: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + L.ConvBNReLU( + in_channels=in_channels[i], + out_channels=out_channels[i], + kernel_size=3, + padding='same', + bias_attr=False)) + else: + residual = self.add_sublayer( + "transition_{}_layer_{}".format(name, i + 1), + L.ConvBNReLU( + in_channels=in_channels[-1], + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + self.conv_bn_func_list.append(residual) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, conv_bn_func in enumerate(self.conv_bn_func_list): + if conv_bn_func is None: + outs.append(x[idx]) + else: + if idx < len(x): + outs.append(conv_bn_func(x[idx])) + else: + outs.append(conv_bn_func(x[-1])) + return outs + + +class Branches(nn.Layer): + def __init__(self, num_blocks: int, in_channels: int, out_channels: int, has_se: bool = False, name: str = None): + super(Branches, self).__init__() + + self.basic_block_list = [] + + for i in range(len(out_channels)): + self.basic_block_list.append([]) + for j in range(num_blocks[i]): + in_ch = in_channels[i] if j == 0 else out_channels[i] + basic_block_func = self.add_sublayer( + "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1), + BasicBlock( + num_channels=in_ch, + num_filters=out_channels[i], + has_se=has_se, + name=name + '_branch_layer_' + str(i + 1) + '_' + str(j + 1))) + self.basic_block_list[i].append(basic_block_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + for idx, input in enumerate(x): + conv = input + for basic_block_func in self.basic_block_list[idx]: + conv = basic_block_func(conv) + outs.append(conv) + return outs + + +class BottleneckBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + has_se: bool, + stride: int = 1, + downsample: bool = False, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = L.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + self.conv2 = L.ConvBNReLU( + in_channels=num_filters, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + + self.conv3 = L.ConvBN( + in_channels=num_filters, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = L.ConvBN( + in_channels=num_channels, out_channels=num_filters * 4, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer( + num_channels=num_filters * 4, num_filters=num_filters * 4, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + conv3 = self.conv3(conv2) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv3 = self.se(conv3) + + y = conv3 + residual + y = F.relu(y) + return y + + +class BasicBlock(nn.Layer): + def __init__(self, + num_channels: int, + num_filters: int, + stride: int = 1, + has_se: bool = False, + downsample: bool = False, + name: str = None): + super(BasicBlock, self).__init__() + + self.has_se = has_se + self.downsample = downsample + + self.conv1 = L.ConvBNReLU( + in_channels=num_channels, + out_channels=num_filters, + kernel_size=3, + stride=stride, + padding='same', + bias_attr=False) + self.conv2 = L.ConvBN( + in_channels=num_filters, out_channels=num_filters, kernel_size=3, padding='same', bias_attr=False) + + if self.downsample: + self.conv_down = L.ConvBNReLU( + in_channels=num_channels, out_channels=num_filters, kernel_size=1, padding='same', bias_attr=False) + + if self.has_se: + self.se = SELayer(num_channels=num_filters, num_filters=num_filters, reduction_ratio=16, name=name + '_fc') + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + residual = x + conv1 = self.conv1(x) + conv2 = self.conv2(conv1) + + if self.downsample: + residual = self.conv_down(x) + + if self.has_se: + conv2 = self.se(conv2) + + y = conv2 + residual + y = F.relu(y) + return y + + +class SELayer(nn.Layer): + def __init__(self, num_channels: int, num_filters: int, reduction_ratio: int, name: str = None): + super(SELayer, self).__init__() + + self.pool2d_gap = nn.AdaptiveAvgPool2D(1) + + self._num_channels = num_channels + + med_ch = int(num_channels / reduction_ratio) + stdv = 1.0 / math.sqrt(num_channels * 1.0) + self.squeeze = nn.Linear( + num_channels, med_ch, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + stdv = 1.0 / math.sqrt(med_ch * 1.0) + self.excitation = nn.Linear( + med_ch, num_filters, weight_attr=paddle.ParamAttr(initializer=nn.initializer.Uniform(-stdv, stdv))) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + pool = self.pool2d_gap(x) + pool = paddle.reshape(pool, shape=[-1, self._num_channels]) + squeeze = self.squeeze(pool) + squeeze = F.relu(squeeze) + excitation = self.excitation(squeeze) + excitation = F.sigmoid(excitation) + excitation = paddle.reshape(excitation, shape=[-1, self._num_channels, 1, 1]) + out = x * excitation + return out + + +class Stage(nn.Layer): + def __init__(self, + num_channels: int, + num_modules: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(Stage, self).__init__() + + self._num_modules = num_modules + + self.stage_func_list = [] + for i in range(num_modules): + if i == num_modules - 1 and not multi_scale_output: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + multi_scale_output=False, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + else: + stage_func = self.add_sublayer( + "stage_{}_{}".format(name, i + 1), + HighResolutionModule( + num_channels=num_channels, + num_blocks=num_blocks, + num_filters=num_filters, + has_se=has_se, + name=name + '_' + str(i + 1), + align_corners=align_corners)) + + self.stage_func_list.append(stage_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = x + for idx in range(self._num_modules): + out = self.stage_func_list[idx](out) + return out + + +class HighResolutionModule(nn.Layer): + def __init__(self, + num_channels: int, + num_blocks: int, + num_filters: int, + has_se: bool = False, + multi_scale_output: bool = True, + name: str = None, + align_corners: str = False): + super(HighResolutionModule, self).__init__() + + self.branches_func = Branches( + num_blocks=num_blocks, in_channels=num_channels, out_channels=num_filters, has_se=has_se, name=name) + + self.fuse_func = FuseLayers( + in_channels=num_filters, + out_channels=num_filters, + multi_scale_output=multi_scale_output, + name=name, + align_corners=align_corners) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + out = self.branches_func(x) + out = self.fuse_func(out) + return out + + +class FuseLayers(nn.Layer): + def __init__(self, + in_channels: int, + out_channels: int, + multi_scale_output: bool = True, + name: str = None, + align_corners: bool = False): + super(FuseLayers, self).__init__() + + self._actual_ch = len(in_channels) if multi_scale_output else 1 + self._in_channels = in_channels + self.align_corners = align_corners + + self.residual_func_list = [] + for i in range(self._actual_ch): + for j in range(len(in_channels)): + if j > i: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}".format(name, i + 1, j + 1), + L.ConvBN( + in_channels=in_channels[j], + out_channels=out_channels[i], + kernel_size=1, + padding='same', + bias_attr=False)) + self.residual_func_list.append(residual_func) + elif j < i: + pre_num_filters = in_channels[j] + for k in range(i - j): + if k == i - j - 1: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + L.ConvBN( + in_channels=pre_num_filters, + out_channels=out_channels[i], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[i] + else: + residual_func = self.add_sublayer( + "residual_{}_layer_{}_{}_{}".format(name, i + 1, j + 1, k + 1), + L.ConvBNReLU( + in_channels=pre_num_filters, + out_channels=out_channels[j], + kernel_size=3, + stride=2, + padding='same', + bias_attr=False)) + pre_num_filters = out_channels[j] + self.residual_func_list.append(residual_func) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outs = [] + residual_func_idx = 0 + for i in range(self._actual_ch): + residual = x[i] + residual_shape = residual.shape[-2:] + for j in range(len(self._in_channels)): + if j > i: + y = self.residual_func_list[residual_func_idx](x[j]) + residual_func_idx += 1 + + y = F.interpolate(y, residual_shape, mode='bilinear', align_corners=self.align_corners) + residual = residual + y + elif j < i: + y = x[j] + for k in range(i - j): + y = self.residual_func_list[residual_func_idx](y) + residual_func_idx += 1 + + residual = residual + y + + residual = F.relu(residual) + outs.append(residual) + + return outs diff --git a/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/layers.py b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..27c5a68a7c725aacca231279aea7ecdd216b20a1 --- /dev/null +++ b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/layers.py @@ -0,0 +1,297 @@ +# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from typing import Tuple + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddle.nn.layer import activation +from paddle.nn import Conv2D, AvgPool2D + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu': + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNLayer(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, + in_channels: int, + out_channels: int, + kernel_size: int, + stride: int = 1, + dilation: int = 1, + groups: int = 1, + is_vd_mode: bool = False, + act: str = None, + name: str = None): + super(ConvBNLayer, self).__init__() + + self.is_vd_mode = is_vd_mode + self._pool2d_avg = AvgPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True) + self._conv = Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=(kernel_size - 1) // 2 if dilation == 1 else 0, + dilation=dilation, + groups=groups, + bias_attr=False) + + self._batch_norm = SyncBatchNorm(out_channels) + self._act_op = Activation(act=act) + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + if self.is_vd_mode: + inputs = self._pool2d_avg(inputs) + y = self._conv(inputs) + y = self._batch_norm(y) + y = self._act_op(y) + + return y + + +class BottleneckBlock(nn.Layer): + """Residual bottleneck block""" + + def __init__(self, + in_channels: int, + out_channels: int, + stride: int, + shortcut: bool = True, + if_first: bool = False, + dilation: int = 1, + name: str = None): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + in_channels=in_channels, out_channels=out_channels, kernel_size=1, act='relu', name=name + "_branch2a") + + self.dilation = dilation + + self.conv1 = ConvBNLayer( + in_channels=out_channels, + out_channels=out_channels, + kernel_size=3, + stride=stride, + act='relu', + dilation=dilation, + name=name + "_branch2b") + self.conv2 = ConvBNLayer( + in_channels=out_channels, out_channels=out_channels * 4, kernel_size=1, act=None, name=name + "_branch2c") + + if not shortcut: + self.short = ConvBNLayer( + in_channels=in_channels, + out_channels=out_channels * 4, + kernel_size=1, + stride=1, + is_vd_mode=False if if_first or stride == 1 else True, + name=name + "_branch1") + + self.shortcut = shortcut + + def forward(self, inputs: paddle.Tensor) -> paddle.Tensor: + y = self.conv0(inputs) + if self.dilation > 1: + padding = self.dilation + y = F.pad(y, [padding, padding, padding, padding]) + + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = paddle.add(x=short, y=conv2) + y = F.relu(y) + return y + + +class SeparableConvBNReLU(nn.Layer): + """Depthwise Separable Convolution.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(SeparableConvBNReLU, self).__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBN, self).__init__() + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs: dict): + super(ConvBNReLU, self).__init__() + + self._conv = Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x + + +class ASPPModule(nn.Layer): + """ + Atrous Spatial Pyramid Pooling. + + Args: + aspp_ratios (tuple): The dilation rate using in ASSP module. + in_channels (int): The number of input channels. + out_channels (int): The number of output channels. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. + use_sep_conv (bool, optional): If using separable conv in ASPP module. Default: False. + image_pooling (bool, optional): If augmented with image-level features. Default: False + """ + + def __init__(self, + aspp_ratios: Tuple[int], + in_channels: int, + out_channels: int, + align_corners: bool, + use_sep_conv: bool = False, + image_pooling: bool = False): + super().__init__() + + self.align_corners = align_corners + self.aspp_blocks = nn.LayerList() + + for ratio in aspp_ratios: + if use_sep_conv and ratio > 1: + conv_func = SeparableConvBNReLU + else: + conv_func = ConvBNReLU + + block = conv_func( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1 if ratio == 1 else 3, + dilation=ratio, + padding=0 if ratio == 1 else ratio) + self.aspp_blocks.append(block) + + out_size = len(self.aspp_blocks) + + if image_pooling: + self.global_avg_pool = nn.Sequential( + nn.AdaptiveAvgPool2D(output_size=(1, 1)), + ConvBNReLU(in_channels, out_channels, kernel_size=1, bias_attr=False)) + out_size += 1 + self.image_pooling = image_pooling + + self.conv_bn_relu = ConvBNReLU(in_channels=out_channels * out_size, out_channels=out_channels, kernel_size=1) + + self.dropout = nn.Dropout(p=0.1) # drop rate + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + outputs = [] + for block in self.aspp_blocks: + y = block(x) + y = F.interpolate(y, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(y) + + if self.image_pooling: + img_avg = self.global_avg_pool(x) + img_avg = F.interpolate(img_avg, x.shape[2:], mode='bilinear', align_corners=self.align_corners) + outputs.append(img_avg) + + x = paddle.concat(outputs, axis=1) + x = self.conv_bn_relu(x) + x = self.dropout(x) + + return x diff --git a/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/module.py b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2ebbfbef041133ce3014877bb91035b6a3e40ff7 --- /dev/null +++ b/modules/image/semantic_segmentation/ocrnet_hrnetw18_cityscapes/module.py @@ -0,0 +1,224 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +from typing import List + +import paddle +import numpy as np +import paddle.nn as nn +import paddle.nn.functional as F +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +import ocrnet_hrnetw18_cityscapes.layers as L +from ocrnet_hrnetw18_cityscapes.hrnet import HRNet_W18 + + +@moduleinfo( + name="ocrnet_hrnetw18_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="OCRNetHRNetW18 is a segmentation model pretrained by pascal voc.", + version="1.0.0", + meta=ImageSegmentationModule) +class OCRNetHRNetW18(nn.Layer): + """ + The OCRNet implementation based on PaddlePaddle. + The original article refers to + Yuan, Yuhui, et al. "Object-Contextual Representations for Semantic Segmentation" + (https://arxiv.org/pdf/1909.11065.pdf) + Args: + num_classes (int): The unique number of target classes. + backbone_indices (list): A list indicates the indices of output of backbone. + It can be either one or two values, if two values, the first index will be taken as + a deep-supervision feature in auxiliary layer; the second one will be taken as + input of pixel representation. If one value, it is taken by both above. + ocr_mid_channels (int, optional): The number of middle channels in OCRHead. Default: 512. + ocr_key_channels (int, optional): The number of key channels in ObjectAttentionBlock. Default: 256. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + pretrained (str, optional): The path or url of pretrained model. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + backbone_indices: List[int] = [0], + ocr_mid_channels: int = 512, + ocr_key_channels: int = 256, + align_corners: bool = False, + pretrained: str = None): + super(OCRNetHRNetW18, self).__init__() + self.backbone = HRNet_W18() + self.backbone_indices = backbone_indices + in_channels = [self.backbone.feat_channels[i] for i in backbone_indices] + self.head = OCRHead( + num_classes=num_classes, + in_channels=in_channels, + ocr_mid_channels=ocr_mid_channels, + ocr_key_channels=ocr_key_channels) + self.align_corners = align_corners + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: np.ndarray) -> np.ndarray: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + feats = self.backbone(x) + feats = [feats[i] for i in self.backbone_indices] + logit_list = self.head(feats) + logit_list = [ + F.interpolate(logit, x.shape[2:], mode='bilinear', align_corners=self.align_corners) for logit in logit_list + ] + return logit_list + + +class OCRHead(nn.Layer): + """ + The Object contextual representation head. + Args: + num_classes(int): The unique number of target classes. + in_channels(tuple): The number of input channels. + ocr_mid_channels(int, optional): The number of middle channels in OCRHead. Default: 512. + ocr_key_channels(int, optional): The number of key channels in ObjectAttentionBlock. Default: 256. + """ + + def __init__(self, num_classes: int, in_channels: int, ocr_mid_channels: int = 512, ocr_key_channels: int = 256): + super().__init__() + + self.num_classes = num_classes + self.spatial_gather = SpatialGatherBlock() + self.spatial_ocr = SpatialOCRModule(ocr_mid_channels, ocr_key_channels, ocr_mid_channels) + + self.indices = [-2, -1] if len(in_channels) > 1 else [-1, -1] + + self.conv3x3_ocr = L.ConvBNReLU(in_channels[self.indices[1]], ocr_mid_channels, 3, padding=1) + self.cls_head = nn.Conv2D(ocr_mid_channels, self.num_classes, 1) + self.aux_head = nn.Sequential( + L.ConvBNReLU(in_channels[self.indices[0]], in_channels[self.indices[0]], 1), + nn.Conv2D(in_channels[self.indices[0]], self.num_classes, 1)) + + def forward(self, feat_list: List[paddle.Tensor]) -> paddle.Tensor: + feat_shallow, feat_deep = feat_list[self.indices[0]], feat_list[self.indices[1]] + + soft_regions = self.aux_head(feat_shallow) + pixels = self.conv3x3_ocr(feat_deep) + + object_regions = self.spatial_gather(pixels, soft_regions) + ocr = self.spatial_ocr(pixels, object_regions) + + logit = self.cls_head(ocr) + return [logit, soft_regions] + + +class SpatialGatherBlock(nn.Layer): + """Aggregation layer to compute the pixel-region representation.""" + + def forward(self, pixels: paddle.Tensor, regions: paddle.Tensor) -> paddle.Tensor: + n, c, h, w = pixels.shape + _, k, _, _ = regions.shape + + # pixels: from (n, c, h, w) to (n, h*w, c) + pixels = paddle.reshape(pixels, (n, c, h * w)) + pixels = paddle.transpose(pixels, [0, 2, 1]) + + # regions: from (n, k, h, w) to (n, k, h*w) + regions = paddle.reshape(regions, (n, k, h * w)) + regions = F.softmax(regions, axis=2) + + # feats: from (n, k, c) to (n, c, k, 1) + feats = paddle.bmm(regions, pixels) + feats = paddle.transpose(feats, [0, 2, 1]) + feats = paddle.unsqueeze(feats, axis=-1) + + return feats + + +class SpatialOCRModule(nn.Layer): + """Aggregate the global object representation to update the representation for each pixel.""" + + def __init__(self, in_channels: int, key_channels: int, out_channels: int, dropout_rate: float = 0.1): + super().__init__() + + self.attention_block = ObjectAttentionBlock(in_channels, key_channels) + self.conv1x1 = nn.Sequential(L.ConvBNReLU(2 * in_channels, out_channels, 1), nn.Dropout2D(dropout_rate)) + + def forward(self, pixels: paddle.Tensor, regions: paddle.Tensor) -> paddle.Tensor: + context = self.attention_block(pixels, regions) + feats = paddle.concat([context, pixels], axis=1) + feats = self.conv1x1(feats) + + return feats + + +class ObjectAttentionBlock(nn.Layer): + """A self-attention module.""" + + def __init__(self, in_channels: int, key_channels: int): + super().__init__() + + self.in_channels = in_channels + self.key_channels = key_channels + + self.f_pixel = nn.Sequential( + L.ConvBNReLU(in_channels, key_channels, 1), L.ConvBNReLU(key_channels, key_channels, 1)) + + self.f_object = nn.Sequential( + L.ConvBNReLU(in_channels, key_channels, 1), L.ConvBNReLU(key_channels, key_channels, 1)) + + self.f_down = L.ConvBNReLU(in_channels, key_channels, 1) + + self.f_up = L.ConvBNReLU(key_channels, in_channels, 1) + + def forward(self, x: paddle.Tensor, proxy: paddle.Tensor) -> paddle.Tensor: + n, _, h, w = x.shape + + # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels) + query = self.f_pixel(x) + query = paddle.reshape(query, (n, self.key_channels, -1)) + query = paddle.transpose(query, [0, 2, 1]) + + # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2) + key = self.f_object(proxy) + key = paddle.reshape(key, (n, self.key_channels, -1)) + + # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels) + value = self.f_down(proxy) + value = paddle.reshape(value, (n, self.key_channels, -1)) + value = paddle.transpose(value, [0, 2, 1]) + + # sim_map (n, h1*w1, h2*w2) + sim_map = paddle.bmm(query, key) + sim_map = (self.key_channels**-.5) * sim_map + sim_map = F.softmax(sim_map, axis=-1) + + # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1) + context = paddle.bmm(sim_map, value) + context = paddle.transpose(context, [0, 2, 1]) + context = paddle.reshape(context, (n, self.key_channels, h, w)) + context = self.f_up(context) + + return context diff --git a/modules/image/semantic_segmentation/ocrnet_hrnetw18_voc/README.md b/modules/image/semantic_segmentation/ocrnet_hrnetw18_voc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..56fd92aaf422baa3ce803d25bed53de33cc6af8c --- /dev/null +++ b/modules/image/semantic_segmentation/ocrnet_hrnetw18_voc/README.md @@ -0,0 +1,176 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='ocrnet_hrnetw18_voc') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + + + +## 如何开始Fine-tune + +本示例将展示如何使用PaddleHub对预训练模型进行finetune并完成预测任务。 + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用ocrnet_hrnetw18_voc模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='ocrnet_hrnetw18_voc', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='ocrnet_hrnetw18_voc', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m ocrnet_hrnetw18_voc +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/ocrnet_hrnetw18_voc" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/unet_cityscapes/README.md b/modules/image/semantic_segmentation/unet_cityscapes/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8510ac7fc2d313f4613a5ee70ccf80ba26b4ae30 --- /dev/null +++ b/modules/image/semantic_segmentation/unet_cityscapes/README.md @@ -0,0 +1,174 @@ +# PaddleHub 图像分割 + +## 模型预测 + +若想使用我们提供的预训练模型进行预测,可使用如下脚本: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='unet_cityscapes') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +## 如何开始Fine-tune + +在完成安装PaddlePaddle与PaddleHub后,通过执行`python train.py`即可开始使用unet_cityscapes模型对OpticDiscSeg等数据集进行Fine-tune。 + +## 代码步骤 + +使用PaddleHub Fine-tune API进行Fine-tune可以分为4个步骤。 + +### Step1: 定义数据预处理方式 +```python +from paddlehub.vision.segmentation_transforms import Compose, Resize, Normalize + +transform = Compose([Resize(target_size=(512, 512)), Normalize()]) +``` + +`segmentation_transforms` 数据增强模块定义了丰富的针对图像分割数据的预处理方式,用户可按照需求替换自己需要的数据预处理方式。 + +### Step2: 下载数据集并使用 +```python +from paddlehub.datasets import OpticDiscSeg + +train_reader = OpticDiscSeg(transform, mode='train') + +``` +* `transform`: 数据预处理方式。 +* `mode`: 选择数据模式,可选项有 `train`, `test`, `val`, 默认为`train`。 + +数据集的准备代码可以参考 [opticdiscseg.py](../../paddlehub/datasets/opticdiscseg.py)。`hub.datasets.OpticDiscSeg()`会自动从网络下载数据集并解压到用户目录下`$HOME/.paddlehub/dataset`目录。 + +### Step3: 加载预训练模型 + +```python +model = hub.Module(name='unet_cityscapes', num_classes=2, pretrained=None) +``` +* `name`: 选择预训练模型的名字。 +* `num_classes`: 分割模型的类别数目。 +* `pretrained`: 是否加载自己训练的模型,若为None,则加载提供的模型默认参数。 + +### Step4: 选择优化策略和运行配置 + +```python +scheduler = paddle.optimizer.lr.PolynomialDecay(learning_rate=0.01, decay_steps=1000, power=0.9, end_lr=0.0001) +optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) +trainer = Trainer(model, optimizer, checkpoint_dir='test_ckpt_img_ocr', use_gpu=True) +``` + +#### 优化策略 + +Paddle2.0rc提供了多种优化器选择,如`SGD`, `Adam`, `Adamax`等,详细参见[策略](https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-rc/api/paddle/optimizer/optimizer/Optimizer_cn.html)。 + +其中`Adam`: + +* `learning_rate`: 全局学习率。 +* `parameters`: 待优化模型参数。 + +#### 运行配置 +`Trainer` 主要控制Fine-tune的训练,包含以下可控制的参数: + +* `model`: 被优化模型; +* `optimizer`: 优化器选择; +* `use_gpu`: 是否使用gpu,默认为False; +* `use_vdl`: 是否使用vdl可视化训练过程; +* `checkpoint_dir`: 保存模型参数的地址; +* `compare_metrics`: 保存最优模型的衡量指标; + +`trainer.train` 主要控制具体的训练过程,包含以下可控制的参数: + +* `train_dataset`: 训练时所用的数据集; +* `epochs`: 训练轮数; +* `batch_size`: 训练的批大小,如果使用GPU,请根据实际情况调整batch_size; +* `num_workers`: works的数量,默认为0; +* `eval_dataset`: 验证集; +* `log_interval`: 打印日志的间隔, 单位为执行批训练的次数。 +* `save_interval`: 保存模型的间隔频次,单位为执行训练的轮数。 + +## 模型预测 + +当完成Fine-tune后,Fine-tune过程在验证集上表现最优的模型会被保存在`${CHECKPOINT_DIR}/best_model`目录下,其中`${CHECKPOINT_DIR}`目录为Fine-tune时所选择的保存checkpoint的目录。 + +我们使用该模型来进行预测。predict.py脚本如下: + +```python +import paddle +import cv2 +import paddlehub as hub + +if __name__ == '__main__': + model = hub.Module(name='unet_cityscapes', pretrained='/PATH/TO/CHECKPOINT') + img = cv2.imread("/PATH/TO/IMAGE") + model.predict(images=[img], visualization=True) +``` + +参数配置正确后,请执行脚本`python predict.py`。 +**Args** +* `images`:原始图像路径或BGR格式图片; +* `visualization`: 是否可视化,默认为True; +* `save_path`: 保存结果的路径,默认保存路径为'seg_result'。 + +**NOTE:** 进行预测时,所选择的module,checkpoint_dir,dataset必须和Fine-tune所用的一样。 + +## 服务部署 + +PaddleHub Serving可以部署一个在线图像分割服务。 + +### Step1: 启动PaddleHub Serving + +运行启动命令: + +```shell +$ hub serving start -m unet_cityscapes +``` + +这样就完成了一个图像分割服务化API的部署,默认端口号为8866。 + +**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +### Step2: 发送预测请求 + +配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + +```python +import requests +import json +import cv2 +import base64 + +import numpy as np + + +def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + +def base64_to_cv2(b64str): + data = base64.b64decode(b64str.encode('utf8')) + data = np.fromstring(data, np.uint8) + data = cv2.imdecode(data, cv2.IMREAD_COLOR) + return data + +# 发送HTTP请求 +org_im = cv2.imread('/PATH/TO/IMAGE') +data = {'images':[cv2_to_base64(org_im)]} +headers = {"Content-type": "application/json"} +url = "http://127.0.0.1:8866/predict/unet_cityscapes" +r = requests.post(url=url, headers=headers, data=json.dumps(data)) +mask = base64_to_cv2(r.json()["results"][0]) +``` + +### 查看代码 + +https://github.com/PaddlePaddle/PaddleSeg + +### 依赖 + +paddlepaddle >= 2.0.0 + +paddlehub >= 2.0.0 diff --git a/modules/image/semantic_segmentation/unet_cityscapes/layers.py b/modules/image/semantic_segmentation/unet_cityscapes/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..e4f909588a88236e9f4f2d2aed9c9c4ea06fead3 --- /dev/null +++ b/modules/image/semantic_segmentation/unet_cityscapes/layers.py @@ -0,0 +1,185 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +def SyncBatchNorm(*args, **kwargs): + """In cpu environment nn.SyncBatchNorm does not have kernel so use nn.BatchNorm2D instead""" + if paddle.get_device() == 'cpu' or os.environ.get('PADDLESEG_EXPORT_STAGE'): + return nn.BatchNorm2D(*args, **kwargs) + else: + return nn.SyncBatchNorm(*args, **kwargs) + + +class ConvBNReLU(nn.Layer): + """Basic conv bn relu layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + x = F.relu(x) + return x + + +class ConvBN(nn.Layer): + """Basic conv bn layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self._conv = nn.Conv2D(in_channels, out_channels, kernel_size, padding=padding, **kwargs) + self._batch_norm = SyncBatchNorm(out_channels) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self._conv(x) + x = self._batch_norm(x) + return x + + +class ConvReLUPool(nn.Layer): + """Basic conv bn pool layer.""" + + def __init__(self, in_channels: int, out_channels: int): + super().__init__() + self.conv = nn.Conv2D(in_channels, out_channels, kernel_size=3, stride=1, padding=1, dilation=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv(x) + x = F.relu(x) + x = F.pool2d(x, pool_size=2, pool_type="max", pool_stride=2) + return x + + +class SeparableConvBNReLU(nn.Layer): + """Basic separable Convolution layer.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + self.piontwise_conv = ConvBNReLU(in_channels, out_channels, kernel_size=1, groups=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + x = self.piontwise_conv(x) + return x + + +class DepthwiseConvBN(nn.Layer): + """Depthwise Convolution.""" + + def __init__(self, in_channels: int, out_channels: int, kernel_size: int, padding: str = 'same', **kwargs): + super().__init__() + self.depthwise_conv = ConvBN( + in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + padding=padding, + groups=in_channels, + **kwargs) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.depthwise_conv(x) + return x + + +class AuxLayer(nn.Layer): + """ + The auxiliary layer implementation for auxiliary loss. + + Args: + in_channels (int): The number of input channels. + inter_channels (int): The intermediate channels. + out_channels (int): The number of output channels, and usually it is num_classes. + dropout_prob (float, optional): The drop rate. Default: 0.1. + """ + + def __init__(self, in_channels: int, inter_channels: int, out_channels: int, dropout_prob: float = 0.1): + super().__init__() + + self.conv_bn_relu = ConvBNReLU(in_channels=in_channels, out_channels=inter_channels, kernel_size=3, padding=1) + + self.dropout = nn.Dropout(p=dropout_prob) + + self.conv = nn.Conv2D(in_channels=inter_channels, out_channels=out_channels, kernel_size=1) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + x = self.conv_bn_relu(x) + x = self.dropout(x) + x = self.conv(x) + return x + + +class Activation(nn.Layer): + """ + The wrapper of activations. + Args: + act (str, optional): The activation name in lowercase. It must be one of ['elu', 'gelu', + 'hardshrink', 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', + 'softmax', 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', + 'hsigmoid']. Default: None, means identical transformation. + Returns: + A callable object of Activation. + Raises: + KeyError: When parameter `act` is not in the optional range. + Examples: + from paddleseg.models.common.activation import Activation + relu = Activation("relu") + print(relu) + # + sigmoid = Activation("sigmoid") + print(sigmoid) + # + not_exit_one = Activation("not_exit_one") + # KeyError: "not_exit_one does not exist in the current dict_keys(['elu', 'gelu', 'hardshrink', + # 'tanh', 'hardtanh', 'prelu', 'relu', 'relu6', 'selu', 'leakyrelu', 'sigmoid', 'softmax', + # 'softplus', 'softshrink', 'softsign', 'tanhshrink', 'logsigmoid', 'logsoftmax', 'hsigmoid'])" + """ + + def __init__(self, act: str = None): + super(Activation, self).__init__() + + self._act = act + upper_act_names = nn.layer.activation.__dict__.keys() + lower_act_names = [act.lower() for act in upper_act_names] + act_dict = dict(zip(lower_act_names, upper_act_names)) + + if act is not None: + if act in act_dict.keys(): + act_name = act_dict[act] + self.act_func = eval("nn.layer.activation.{}()".format(act_name)) + else: + raise KeyError("{} does not exist in the current {}".format(act, act_dict.keys())) + + def forward(self, x: paddle.Tensor) -> paddle.Tensor: + if self._act is not None: + return self.act_func(x) + else: + return x diff --git a/modules/image/semantic_segmentation/unet_cityscapes/module.py b/modules/image/semantic_segmentation/unet_cityscapes/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f2bcc19f5c7662858ecac7b9c2d89dbbc2f8628b --- /dev/null +++ b/modules/image/semantic_segmentation/unet_cityscapes/module.py @@ -0,0 +1,151 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import Union, List, Tuple + +import paddle +from paddle import nn +import paddle.nn.functional as F +import numpy as np +from paddlehub.module.module import moduleinfo +import paddlehub.vision.segmentation_transforms as T +from paddlehub.module.cv_module import ImageSegmentationModule + +import unet_cityscapes.layers as layers + + +@moduleinfo( + name="unet_cityscapes", + type="CV/semantic_segmentation", + author="paddlepaddle", + author_email="", + summary="Unet is a segmentation model.", + version="1.0.0", + meta=ImageSegmentationModule) +class UNet(nn.Layer): + """ + The UNet implementation based on PaddlePaddle. + + The original article refers to + Olaf Ronneberger, et, al. "U-Net: Convolutional Networks for Biomedical Image Segmentation" + (https://arxiv.org/abs/1505.04597). + + Args: + num_classes (int): The unique number of target classes. + align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature + is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False. + use_deconv (bool, optional): A bool value indicates whether using deconvolution in upsampling. + If False, use resize_bilinear. Default: False. + pretrained (str, optional): The path or url of pretrained model for fine tuning. Default: None. + """ + + def __init__(self, + num_classes: int = 19, + align_corners: bool = False, + use_deconv: bool = False, + pretrained: str = None): + super(UNet, self).__init__() + + self.encode = Encoder() + self.decode = Decoder(align_corners, use_deconv=use_deconv) + self.cls = self.conv = nn.Conv2D(in_channels=64, out_channels=num_classes, kernel_size=3, stride=1, padding=1) + + self.transforms = T.Compose([T.Normalize()]) + + if pretrained is not None: + model_dict = paddle.load(pretrained) + self.set_dict(model_dict) + print("load custom parameters success") + + else: + checkpoint = os.path.join(self.directory, 'model.pdparams') + model_dict = paddle.load(checkpoint) + self.set_dict(model_dict) + print("load pretrained parameters success") + + def transform(self, img: Union[np.ndarray, str]) -> Union[np.ndarray, str]: + return self.transforms(img) + + def forward(self, x: paddle.Tensor) -> List[paddle.Tensor]: + logit_list = [] + x, short_cuts = self.encode(x) + x = self.decode(x, short_cuts) + logit = self.cls(x) + logit_list.append(logit) + return logit_list + + +class Encoder(nn.Layer): + def __init__(self): + super().__init__() + + self.double_conv = nn.Sequential(layers.ConvBNReLU(3, 64, 3), layers.ConvBNReLU(64, 64, 3)) + down_channels = [[64, 128], [128, 256], [256, 512], [512, 512]] + self.down_sample_list = nn.LayerList([self.down_sampling(channel[0], channel[1]) for channel in down_channels]) + + def down_sampling(self, in_channels: int, out_channels: int) -> nn.Layer: + modules = [] + modules.append(nn.MaxPool2D(kernel_size=2, stride=2)) + modules.append(layers.ConvBNReLU(in_channels, out_channels, 3)) + modules.append(layers.ConvBNReLU(out_channels, out_channels, 3)) + return nn.Sequential(*modules) + + def forward(self, x: paddle.Tensor) -> Tuple: + short_cuts = [] + x = self.double_conv(x) + for down_sample in self.down_sample_list: + short_cuts.append(x) + x = down_sample(x) + return x, short_cuts + + +class Decoder(nn.Layer): + def __init__(self, align_corners: bool, use_deconv: bool = False): + super().__init__() + + up_channels = [[512, 256], [256, 128], [128, 64], [64, 64]] + self.up_sample_list = nn.LayerList( + [UpSampling(channel[0], channel[1], align_corners, use_deconv) for channel in up_channels]) + + def forward(self, x: paddle.Tensor, short_cuts: List) -> paddle.Tensor: + for i in range(len(short_cuts)): + x = self.up_sample_list[i](x, short_cuts[-(i + 1)]) + return x + + +class UpSampling(nn.Layer): + def __init__(self, in_channels: int, out_channels: int, align_corners: bool, use_deconv: bool = False): + super().__init__() + + self.align_corners = align_corners + + self.use_deconv = use_deconv + if self.use_deconv: + self.deconv = nn.Conv2DTranspose(in_channels, out_channels // 2, kernel_size=2, stride=2, padding=0) + in_channels = in_channels + out_channels // 2 + else: + in_channels *= 2 + + self.double_conv = nn.Sequential( + layers.ConvBNReLU(in_channels, out_channels, 3), layers.ConvBNReLU(out_channels, out_channels, 3)) + + def forward(self, x: paddle.Tensor, short_cut: paddle.Tensor) -> paddle.Tensor: + if self.use_deconv: + x = self.deconv(x) + else: + x = F.interpolate(x, paddle.shape(short_cut)[2:], mode='bilinear', align_corners=self.align_corners) + x = paddle.concat([x, short_cut], axis=1) + x = self.double_conv(x) + return x diff --git a/modules/thirdparty/image/text_recognition/Vehicle_License_Plate_Recognition/README.md b/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README.md similarity index 94% rename from modules/thirdparty/image/text_recognition/Vehicle_License_Plate_Recognition/README.md rename to modules/image/text_recognition/Vehicle_License_Plate_Recognition/README.md index cc299d800abfa4f627aa4ecd28c90e1b0281d802..e30a0ef6eeb9fb80feec73420bcaa653430952ee 100644 --- a/modules/thirdparty/image/text_recognition/Vehicle_License_Plate_Recognition/README.md +++ b/modules/image/text_recognition/Vehicle_License_Plate_Recognition/README.md @@ -1,7 +1,7 @@ # Vehicle_License_Plate_Recognition |模型名称|Vehicle_License_Plate_Recognition| -| :--- | :---: | +| :--- | :---: | |类别|图像 - 文字识别| |网络|-| |数据集|CCPD| @@ -17,8 +17,8 @@ - 样例结果示例:


-

- +

+ - ### 模型介绍 @@ -27,20 +27,20 @@ ## 二、安装 -- ### 1、环境依赖 +- ### 1、环境依赖 - - paddlepaddle >= 2.0.0 + - paddlepaddle >= 2.0.0 - paddlehub >= 2.0.4 - - paddleocr >= 2.0.2 + - paddleocr >= 2.0.2 - ### 2、安装 - ```shell $ hub install Vehicle_License_Plate_Recognition ``` - + ## 三、模型API预测 - ### 1、代码示例 @@ -64,8 +64,8 @@ - **参数** - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\];
- - + + - **返回** - results(list(dict{'license', 'bbox'})): 识别到的车牌信息列表,包含车牌的位置坐标和车牌号码 @@ -116,7 +116,7 @@ * 1.0.0 初始发布 - + - ```shell $ hub install Vehicle_License_Plate_Recognition==1.0.0 - ``` \ No newline at end of file + ``` diff --git a/modules/thirdparty/image/text_recognition/Vehicle_License_Plate_Recognition/module.py b/modules/image/text_recognition/Vehicle_License_Plate_Recognition/module.py similarity index 100% rename from modules/thirdparty/image/text_recognition/Vehicle_License_Plate_Recognition/module.py rename to modules/image/text_recognition/Vehicle_License_Plate_Recognition/module.py diff --git a/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..84180ef991177b07ba1e9d652743de294449caa3 --- /dev/null +++ b/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/README.md @@ -0,0 +1,165 @@ +# arabic_ocr_db_crnn_mobile + +|模型名称|arabic_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - arabic_ocr_db_crnn_mobile Module用于识别图片当中的阿拉伯文字,包括阿拉伯文、波斯文、维吾尔文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的阿拉伯文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别阿拉伯文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install arabic_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run arabic_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run arabic_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="arabic_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造ArabicOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m arabic_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/arabic_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install arabic_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e1d603f6eabdb622b5cf58b9a5b645e991d3889a --- /dev/null +++ b/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="arabic_ocr_db_crnn_mobile", + version="1.1.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class ArabicOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="arabic", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/arabic_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8a234985b78d0bf05a89ed42a6d27b1117f0b924 --- /dev/null +++ b/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/README.md @@ -0,0 +1,165 @@ +# chinese_cht_ocr_db_crnn_mobile + +|模型名称|chinese_cht_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - chinese_cht_ocr_db_crnn_mobile Module用于识别图片当中的繁体中文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的繁体中文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别繁体中文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install chinese_cht_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run chinese_cht_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run chinese_cht_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="chinese_cht_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造ChineseChtOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m chinese_cht_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/chinese_cht_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install chinese_cht_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b1c10a8feab26bb3a00e235c00de56d7476476bb --- /dev/null +++ b/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="chinese_cht_ocr_db_crnn_mobile", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class ChineseChtOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="chinese_cht", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/chinese_cht_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..679b2a0598933d4c5450adca1c997e1a4c323ef4 --- /dev/null +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/README_en.md @@ -0,0 +1,202 @@ +# chinese_ocr_db_crnn_mobile + +| Module Name | chinese_ocr_db_crnn_mobile | +| :------------------ | :------------: | +| Category | image-text_recognition | +| Network | Differentiable Binarization+RCNN | +| Dataset | icdar2015 | +| Fine-tuning supported or not | No | +| Module Size | 16M | +| Latest update date | 2021-02-26 | +| Data indicators | - | + + +## I. Basic Information of Module + +- ### Application Effect Display + - [Online experience in OCR text recognition scenarios](https://www.paddlepaddle.org.cn/hub/scene/ocr) + - Example result: +

+
+

+ +- ### Module Introduction + + - chinese_ocr_db_crnn_mobile Module is used to identify Chinese characters in pictures. Get the text box after using [chinese_text_detection_db_mobile Module](../chinese_text_detection_db_mobile/), identify the Chinese characters in the text box, and then do angle classification to the detection text box. CRNN(Convolutional Recurrent Neural Network) is adopted as the final recognition algorithm. This Module is an ultra-lightweight Chinese OCR model that supports direct prediction. + + +

+
+

+ + - For more information, please refer to:[An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## II. Installation + +- ### 1、Environmental dependence + + - paddlepaddle >= 1.7.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + + - shapely + + - pyclipper + + - ```shell + $ pip install shapely pyclipper + ``` + - **This Module relies on the third-party libraries shapely and pyclipper. Please install shapely and pyclipper before using this Module.** + +- ### 2、Installation + + - ```shell + $ hub install chinese_ocr_db_crnn_mobile + ``` + - If you have problems during installation, please refer to:[windows_quickstart](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [linux_quickstart](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [mac_quickstart](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## III. Module API and Prediction + + +- ### 1、Command line Prediction + + - ```shell + $ hub run chinese_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + ``` + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command line instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="chinese_ocr_db_crnn_mobile", enable_mkldnn=True) # MKLDNN acceleration is only available on CPU + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + __init__(text_detector_module=None, enable_mkldnn=False) + ``` + + - Construct the ChineseOCRDBCRNN object + + - **Parameter** + + - text_detector_module(str): PaddleHub Module Name for text detection, use [chinese_text_detection_db_mobile Module](../chinese_text_detection_db_mobile/) by default if set to None. Its function is to detect the text in the picture. + - enable_mkldnn(bool): Whether to enable MKLDNN to accelerate CPU computing. This parameter is valid only when the CPU is running. The default is False. + + + - ```python + def recognize_text(images=[], + paths=[], + use_gpu=False, + output_dir='ocr_result', + visualization=False, + box_thresh=0.5, + text_thresh=0.5, + angle_classification_thresh=0.9) + ``` + + - Prediction API, detecting the position of all Chinese text in the input image. + + - **Parameter** + + - paths (list\[str\]): image path + - images (list\[numpy.ndarray\]): image data, ndarray.shape is in the format \[H, W, C\], BGR; + - use\_gpu (bool): use GPU or not **If GPU is used, set the CUDA_VISIBLE_DEVICES environment variable first** + - box\_thresh (float): The confidence threshold of text box detection; + - text\_thresh (float): The confidence threshold of Chinese text recognition; + - angle_classification_thresh(float): The confidence threshold of text Angle classification + - visualization (bool): Whether to save the recognition results as picture files; + - output\_dir (str): path to save the image, ocr\_result by default. + + - **Return** + + - res (list\[dict\]): The list of recognition results, where each element is dict and each field is: + - data (list\[dict\]): recognition result, each element in the list is dict and each field is: + - text(str): The result text of recognition + - confidence(float): The confidence of the results + - text_box_position(list): The pixel coordinates of the text box in the original picture, a 4*2 matrix, represent the coordinates of the lower left, lower right, upper right and upper left vertices of the text box in turn + data is \[\] if there's no result + - save_path (str, optional): Path to save the result, save_path is '' if no image is saved. + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online object detection service. + +- ### Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m chinese_ocr_db_crnn_mobile + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before prediction. Otherwise, need not set it. + + +- ### Step 2: Send a predictive request + + - After configuring the server, the following lines of code can be used to send the prediction request and obtain the prediction result + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # Send an HTTP request + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/chinese_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction result + print(r.json()["results"]) + ``` + +## V. Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Fixed failure to use the online service invocating model + +* 1.0.2 + + Supports MKLDNN to speed up CPU computing + +* 1.1.0 + + An ultra-lightweight three-stage model (text box detection - angle classification - text recognition) is used to identify text in images. + +* 1.1.1 + + Supports recognition of spaces in text. + +* 1.1.2 + + Fixed an issue where only 30 fields can be detected. + + - ```shell + $ hub install chinese_ocr_db_crnn_mobile==1.1.2 + ``` diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py index 892c863f4b853c0232b682faffe0eb517b4a28d0..371e8f97317b173e730be0683004589a2aa5f162 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_mobile/module.py @@ -21,7 +21,7 @@ from chinese_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ @moduleinfo( name="chinese_ocr_db_crnn_mobile", - version="1.1.1", + version="1.1.2", summary="The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions \ based on the differentiable_binarization_chn module. Then it classifies the text angle and recognizes the chinese texts. ", author="paddle-dev", @@ -490,14 +490,3 @@ class ChineseOCRDBCRNN(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - ocr = ChineseOCRDBCRNN() - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py index 9175005980f18509c596460b380078318c346257..a96673f3dab687a52214a5b70702a0b23ea5694b 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/module.py @@ -25,7 +25,7 @@ from chinese_ocr_db_crnn_server.utils import base64_to_cv2, draw_ocr, get_image_ @moduleinfo( name="chinese_ocr_db_crnn_server", - version="1.1.1", + version="1.1.2", summary= "The module can recognize the chinese texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization_chn module. Then it recognizes the chinese texts. ", author="paddle-dev", @@ -494,14 +494,3 @@ class ChineseOCRDBCRNNServer(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - ocr = ChineseOCRDBCRNNServer(enable_mkldnn=False) - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) diff --git a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py index c5bf34d04181cf228f4081024108550a1bc36187..5a90b27defba39446eb82ce83485811494e9caba 100644 --- a/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py +++ b/modules/image/text_recognition/chinese_ocr_db_crnn_server/utils.py @@ -172,6 +172,6 @@ def sorted_boxes(dt_boxes): def base64_to_cv2(b64str): data = base64.b64decode(b64str.encode('utf8')) - data = np.frombuffer(data, np.uint8) + data = np.fromstring(data, np.uint8) data = cv2.imdecode(data, cv2.IMREAD_COLOR) return data diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py index 14fd6137c805c30d58046d7896fe6b251a73cc8a..aaae4aea1626da8069667642fee6a2ae9f5aad89 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/module.py @@ -29,7 +29,7 @@ def base64_to_cv2(b64str): @moduleinfo( name="chinese_text_detection_db_mobile", - version="1.0.3", + version="1.0.4", summary= "The module aims to detect chinese text position in the image, which is based on differentiable_binarization algorithm.", author="paddle-dev", @@ -103,26 +103,6 @@ class ChineseTextDetectionDB(hub.Module): images.append(img) return images - def clip_det_res(self, points, img_height, img_width): - for pno in range(points.shape[0]): - points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) - points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) - return points - - def filter_tag_det_res(self, dt_boxes, image_shape): - img_height, img_width = image_shape[0:2] - dt_boxes_new = [] - for box in dt_boxes: - box = self.order_points_clockwise(box) - box = self.clip_det_res(box, img_height, img_width) - rect_width = int(np.linalg.norm(box[0] - box[1])) - rect_height = int(np.linalg.norm(box[0] - box[3])) - if rect_width <= 10 or rect_height <= 10: - continue - dt_boxes_new.append(box) - dt_boxes = np.array(dt_boxes_new) - return dt_boxes - def order_points_clockwise(self, pts): """ reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py @@ -147,6 +127,35 @@ class ChineseTextDetectionDB(hub.Module): rect = np.array([tl, tr, br, bl], dtype="float32") return rect + def clip_det_res(self, points, img_height, img_width): + for pno in range(points.shape[0]): + points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1)) + points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1)) + return points + + def filter_tag_det_res(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.order_points_clockwise(box) + box = self.clip_det_res(box, img_height, img_width) + rect_width = int(np.linalg.norm(box[0] - box[1])) + rect_height = int(np.linalg.norm(box[0] - box[3])) + if rect_width <= 3 or rect_height <= 3: + continue + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + + def filter_tag_det_res_only_clip(self, dt_boxes, image_shape): + img_height, img_width = image_shape[0:2] + dt_boxes_new = [] + for box in dt_boxes: + box = self.clip_det_res(box, img_height, img_width) + dt_boxes_new.append(box) + dt_boxes = np.array(dt_boxes_new) + return dt_boxes + def detect_text(self, images=[], paths=[], @@ -193,7 +202,7 @@ class ChineseTextDetectionDB(hub.Module): 'thresh': 0.3, 'box_thresh': 0.5, 'max_candidates': 1000, - 'unclip_ratio': 2.0 + 'unclip_ratio': 1.6 }) all_imgs = [] @@ -314,14 +323,3 @@ class ChineseTextDetectionDB(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - db = ChineseTextDetectionDB() - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/2.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = db.detect_text(paths=image_path, visualization=True) - db.save_inference_model('save') - print(res) diff --git a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py index 5c6df83d714b545603b028c7f9dc006fc12d7d39..b5e76cbe99eb9260e3e788d5013deadc72b9b4c5 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py +++ b/modules/image/text_recognition/chinese_text_detection_db_mobile/processor.py @@ -120,6 +120,7 @@ class DBPostProcess(object): self.max_candidates = params['max_candidates'] self.unclip_ratio = params['unclip_ratio'] self.min_size = 3 + self.dilation_kernel = np.array([[1, 1], [1, 1]]) def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): ''' @@ -218,7 +219,9 @@ class DBPostProcess(object): boxes_batch = [] for batch_index in range(pred.shape[0]): height, width = pred.shape[-2:] - tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], segmentation[batch_index], width, height) + + mask = cv2.dilate(np.array(segmentation[batch_index]).astype(np.uint8), self.dilation_kernel) + tmp_boxes, tmp_scores = self.boxes_from_bitmap(pred[batch_index], mask, width, height) boxes = [] for k in range(len(tmp_boxes)): diff --git a/modules/image/text_recognition/chinese_text_detection_db_server/module.py b/modules/image/text_recognition/chinese_text_detection_db_server/module.py index 91ac7f325aecce47e041c92a9a7fae55d339983c..52295bef7c40bec2ff8a58133d25a0c7bc86c957 100644 --- a/modules/image/text_recognition/chinese_text_detection_db_server/module.py +++ b/modules/image/text_recognition/chinese_text_detection_db_server/module.py @@ -297,11 +297,3 @@ class ChineseTextDetectionDBServer(hub.Module): Add the command input options """ self.arg_input_group.add_argument('--input_path', type=str, default=None, help="diretory to image") - - -if __name__ == '__main__': - db = ChineseTextDetectionDBServer() - image_path = ['/mnt/zhangxuefei/PaddleOCR/doc/imgs/11.jpg', '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg'] - res = db.detect_text(paths=image_path, visualization=True) - db.save_inference_model('save') - print(res) diff --git a/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..24eb4f6d9bc7d02963519457d0b1bdcb657ca330 --- /dev/null +++ b/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/README.md @@ -0,0 +1,165 @@ +# cyrillic_ocr_db_crnn_mobile + +|模型名称|cyrillic_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - cyrillic_ocr_db_crnn_mobile Module用于识别图片当中的斯拉夫文,包括俄罗斯文、塞尔维亚文、白俄罗斯文、保加利亚文、乌克兰文、蒙古文、阿迪赫文、阿瓦尔文、达尔瓦文、因古什文、拉克文、莱兹甘文、塔巴萨兰文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的斯拉夫文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别斯拉夫文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install cyrillic_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run cyrillic_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run cyrillic_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="cyrillic_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造CyrillicOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m cyrillic_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/cyrillic_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install cyrillic_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..bd182e6693ddb72059fbb3a5cc28a96e3f27c1e6 --- /dev/null +++ b/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="cyrillic_ocr_db_crnn_mobile", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class CyrillicOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="cyrillic", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/cyrillic_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a47c2dd12e04d55f116fd52a3008470ef6fe94b8 --- /dev/null +++ b/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/README.md @@ -0,0 +1,165 @@ +# devanagari_ocr_db_crnn_mobile + +|模型名称|devanagari_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - devanagari_ocr_db_crnn_mobile Module用于识别图片当中的梵文,包括印地文、马拉地文、尼泊尔文、比尔哈文、迈蒂利文、昂加文、孟加拉文、摩揭陀文、那格浦尔文、尼瓦尔文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的梵文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别梵文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install devanagari_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run devanagari_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run devanagari_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="devanagari_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造DevanagariOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m devanagari_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/devanagari_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install devanagari_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..a165f934188d9d0df9fd9f18378e141330ff4b38 --- /dev/null +++ b/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="devanagari_ocr_db_crnn_mobile", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class DevanagariOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="devanagari", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/devanagari_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/french_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/french_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2b7feb555d1547e869a55ba9ed4bda38a7244398 --- /dev/null +++ b/modules/image/text_recognition/french_ocr_db_crnn_mobile/README.md @@ -0,0 +1,169 @@ +# french_ocr_db_crnn_mobile + +|模型名称|french_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - french_ocr_db_crnn_mobile Module用于识别图片当中的法文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的法文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别法文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install french_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run french_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run french_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="french_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造FrechOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m french_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/french_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 优化模型 + - ```shell + $ hub install french_ocr_db_crnn_mobile==1.1.0 + ``` diff --git a/modules/image/text_recognition/french_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/french_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/french_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/french_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..674c2db8b5d4bfae94b800e202f72692bda33f97 --- /dev/null +++ b/modules/image/text_recognition/french_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="french_ocr_db_crnn_mobile", + version="1.1.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class FrechOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="fr", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/french_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/french_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/french_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/german_ocr_db_crnn_mobile/README.md index d5cfe848f7c27281e82789787ffc2688f643af52..813355649c664a4f1ebf4dc62d9f899e3177aa45 100644 --- a/modules/image/text_recognition/german_ocr_db_crnn_mobile/README.md +++ b/modules/image/text_recognition/german_ocr_db_crnn_mobile/README.md @@ -27,18 +27,9 @@ - ### 1、环境依赖 - - paddlepaddle >= 1.8.0 + - paddlepaddle >= 2.0.2 - - paddlehub >= 1.8.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - - - shapely - - - pyclipper - - - ```shell - $ pip install shapely pyclipper - ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -58,7 +49,7 @@ - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub @@ -159,13 +150,15 @@ print(r.json()["results"]) ``` - ## 五、更新历史 * 1.0.0 初始发布 +* 1.1.0 + + 优化模型 - ```shell - $ hub install german_ocr_db_crnn_mobile==1.0.0 + $ hub install german_ocr_db_crnn_mobile==1.1.0 ``` diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/assets/german_dict.txt b/modules/image/text_recognition/german_ocr_db_crnn_mobile/assets/german_dict.txt deleted file mode 100644 index 30c4d4218e8a77386db912e24117b1f197466e83..0000000000000000000000000000000000000000 --- a/modules/image/text_recognition/german_ocr_db_crnn_mobile/assets/german_dict.txt +++ /dev/null @@ -1,131 +0,0 @@ -! -" -$ -% -& -' -( -) -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -> -? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -£ -§ -­ -² -´ -µ -· -º -¼ -½ -¿ -À -Á -Ä -Å -Ç -É -Í -Ï -Ô -Ö -Ø -Ù -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -í -ï -ñ -ò -ó -ô -ö -ø -ù -ú -û -ü - diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/character.py b/modules/image/text_recognition/german_ocr_db_crnn_mobile/character.py deleted file mode 100644 index 21dbbd9dc790e3d009f45c1ef1b68c001e9f0e0b..0000000000000000000000000000000000000000 --- a/modules/image/text_recognition/german_ocr_db_crnn_mobile/character.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import string - -class CharacterOps(object): - """ Convert between text-label and text-index """ - - def __init__(self, config): - self.character_type = config['character_type'] - self.loss_type = config['loss_type'] - self.max_text_len = config['max_text_length'] - if self.character_type == "en": - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - elif self.character_type in [ - "ch", 'japan', 'korean', 'french', 'german' - ]: - character_dict_path = config['character_dict_path'] - add_space = False - if 'use_space_char' in config: - add_space = config['use_space_char'] - self.character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str += line - if add_space: - self.character_str += " " - dict_character = list(self.character_str) - elif self.character_type == "en_sensitive": - # same with ASTER setting (use 94 char). - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) - else: - self.character_str = None - assert self.character_str is not None, \ - "Nonsupport type of the character: {}".format(self.character_str) - self.beg_str = "sos" - self.end_str = "eos" - if self.loss_type == "attention": - dict_character = [self.beg_str, self.end_str] + dict_character - elif self.loss_type == "srn": - dict_character = dict_character + [self.beg_str, self.end_str] - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def encode(self, text): - """convert text-label into text-index. - input: - text: text labels of each image. [batch_size] - - output: - text: concatenated text index for CTCLoss. - [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] - length: length of each text. [batch_size] - """ - if self.character_type == "en": - text = text.lower() - - text_list = [] - for char in text: - if char not in self.dict: - continue - text_list.append(self.dict[char]) - text = np.array(text_list) - return text - - def decode(self, text_index, is_remove_duplicate=False): - """ convert text-index into text-label. """ - char_list = [] - char_num = self.get_char_num() - - if self.loss_type == "attention": - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - ignored_tokens = [beg_idx, end_idx] - else: - ignored_tokens = [char_num] - - for idx in range(len(text_index)): - if text_index[idx] in ignored_tokens: - continue - if is_remove_duplicate: - if idx > 0 and text_index[idx - 1] == text_index[idx]: - continue - char_list.append(self.character[int(text_index[idx])]) - text = ''.join(char_list) - return text - - def get_char_num(self): - return len(self.character) - - def get_beg_end_flag_idx(self, beg_or_end): - if self.loss_type == "attention": - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx"\ - % beg_or_end - return idx - else: - err = "error in get_beg_end_flag_idx when using the loss %s"\ - % (self.loss_type) - assert False, err - - -def cal_predicts_accuracy(char_ops, - preds, - preds_lod, - labels, - labels_lod, - is_remove_duplicate=False): - acc_num = 0 - img_num = 0 - for ino in range(len(labels_lod) - 1): - beg_no = preds_lod[ino] - end_no = preds_lod[ino + 1] - preds_text = preds[beg_no:end_no].reshape(-1) - preds_text = char_ops.decode(preds_text, is_remove_duplicate) - - beg_no = labels_lod[ino] - end_no = labels_lod[ino + 1] - labels_text = labels[beg_no:end_no].reshape(-1) - labels_text = char_ops.decode(labels_text, is_remove_duplicate) - img_num += 1 - - if preds_text == labels_text: - acc_num += 1 - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def cal_predicts_accuracy_srn(char_ops, - preds, - labels, - max_text_len, - is_debug=False): - acc_num = 0 - img_num = 0 - - char_num = char_ops.get_char_num() - - total_len = preds.shape[0] - img_num = int(total_len / max_text_len) - for i in range(img_num): - cur_label = [] - cur_pred = [] - for j in range(max_text_len): - if labels[j + i * max_text_len] != int(char_num - 1): #0 - cur_label.append(labels[j + i * max_text_len][0]) - else: - break - - for j in range(max_text_len + 1): - if j < len(cur_label) and preds[j + i * max_text_len][ - 0] != cur_label[j]: - break - elif j == len(cur_label) and j == max_text_len: - acc_num += 1 - break - elif j == len(cur_label) and preds[j + i * max_text_len][0] == int( - char_num - 1): - acc_num += 1 - break - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def convert_rec_attention_infer_res(preds): - img_num = preds.shape[0] - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - end_pos = np.where(preds[ino, :] == 1)[0] - if len(end_pos) <= 1: - text_list = preds[ino, 1:] - else: - text_list = preds[ino, 1:end_pos[1]] - target_lod.append(target_lod[ino] + len(text_list)) - convert_ids = convert_ids + list(text_list) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod - - -def convert_rec_label_to_lod(ori_labels): - img_num = len(ori_labels) - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - target_lod.append(target_lod[ino] + len(ori_labels[ino])) - convert_ids = convert_ids + list(ori_labels[ino]) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/german_ocr_db_crnn_mobile/module.py index 6b59d274faa7a583851369a38fb73756dfcbcebe..569cc14817d85313037a60463f0115fb0a65deaf 100644 --- a/modules/image/text_recognition/german_ocr_db_crnn_mobile/module.py +++ b/modules/image/text_recognition/german_ocr_db_crnn_mobile/module.py @@ -1,304 +1,61 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import copy -import math -import os -import time - -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import cv2 -import numpy as np -import paddle.fluid as fluid import paddlehub as hub - -from german_ocr_db_crnn_mobile.character import CharacterOps -from german_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving @moduleinfo( name="german_ocr_db_crnn_mobile", - version="1.0.0", - summary= - "The module can recognize the german texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization module. Then it recognizes the german texts. ", - author="paddle-dev", - author_email="paddle-dev@baidu.com", + version="1.1.0", + summary="ocr service", + author="PaddlePaddle", type="cv/text_recognition") -class GermanOCRDBCRNNMobile(hub.Module): - def _initialize(self, text_detector_module=None, enable_mkldnn=False, use_angle_classification=False): +class GermanOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): """ initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence """ - self.character_dict_path = os.path.join(self.directory, 'assets', - 'german_dict.txt') - char_ops_params = { - 'character_type': 'german', - 'character_dict_path': self.character_dict_path, - 'loss_type': 'ctc', - 'max_text_length': 25, - 'use_space_char': True - } - self.char_ops = CharacterOps(char_ops_params) - self.rec_image_shape = [3, 32, 320] - self._text_detector_module = text_detector_module - self.font_file = os.path.join(self.directory, 'assets', 'german.ttf') - self.enable_mkldnn = enable_mkldnn - self.use_angle_classification = use_angle_classification - - self.rec_pretrained_model_path = os.path.join( - self.directory, 'inference_model', 'character_rec') - self.rec_predictor, self.rec_input_tensor, self.rec_output_tensors = self._set_config( - self.rec_pretrained_model_path) - - if self.use_angle_classification: - self.cls_pretrained_model_path = os.path.join( - self.directory, 'inference_model', 'angle_cls') - - self.cls_predictor, self.cls_input_tensor, self.cls_output_tensors = self._set_config( - self.cls_pretrained_model_path) - - def _set_config(self, pretrained_model_path): - """ - predictor config path - """ - model_file_path = os.path.join(pretrained_model_path, 'model') - params_file_path = os.path.join(pretrained_model_path, 'params') - - config = AnalysisConfig(model_file_path, params_file_path) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - - if use_gpu: - config.enable_use_gpu(8000, 0) - else: - config.disable_gpu() - if self.enable_mkldnn: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - - config.disable_glog_info() - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.switch_use_feed_fetch_ops(False) - - predictor = create_paddle_predictor(config) - - input_names = predictor.get_input_names() - input_tensor = predictor.get_input_tensor(input_names[0]) - output_names = predictor.get_output_names() - output_tensors = [] - for output_name in output_names: - output_tensor = predictor.get_output_tensor(output_name) - output_tensors.append(output_tensor) - - return predictor, input_tensor, output_tensors - - @property - def text_detector_module(self): - """ - text detect module - """ - if not self._text_detector_module: - self._text_detector_module = hub.Module( - name='chinese_text_detection_db_mobile', - enable_mkldnn=self.enable_mkldnn, - version='1.0.4') - return self._text_detector_module - - def read_images(self, paths=[]): - images = [] - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file.".format(img_path) - img = cv2.imread(img_path) - if img is None: - logger.info("error in loading image:{}".format(img_path)) - continue - images.append(img) - return images - - def get_rotate_crop_image(self, img, points): - ''' - img_height, img_width = img.shape[0:2] - left = int(np.min(points[:, 0])) - right = int(np.max(points[:, 0])) - top = int(np.min(points[:, 1])) - bottom = int(np.max(points[:, 1])) - img_crop = img[top:bottom, left:right, :].copy() - points[:, 0] = points[:, 0] - left - points[:, 1] = points[:, 1] - top - ''' - img_crop_width = int( - max( - np.linalg.norm(points[0] - points[1]), - np.linalg.norm(points[2] - points[3]))) - img_crop_height = int( - max( - np.linalg.norm(points[0] - points[3]), - np.linalg.norm(points[1] - points[2]))) - pts_std = np.float32([[0, 0], [img_crop_width, 0], - [img_crop_width, img_crop_height], - [0, img_crop_height]]) - M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img, - M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE, - flags=cv2.INTER_CUBIC) - dst_img_height, dst_img_width = dst_img.shape[0:2] - if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) - return dst_img - - def resize_norm_img_rec(self, img, max_wh_ratio): - imgC, imgH, imgW = self.rec_image_shape - assert imgC == img.shape[2] - h, w = img.shape[:2] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def resize_norm_img_cls(self, img): - cls_image_shape = [3, 48, 192] - imgC, imgH, imgW = cls_image_shape - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if cls_image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def recognize_text(self, - images=[], - paths=[], - use_gpu=False, - output_dir='ocr_result', - visualization=False, - box_thresh=0.5, - text_thresh=0.5, - angle_classification_thresh=0.9): - """ - Get the chinese texts in the predicted images. + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="german", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths paths (list[str]): The paths of images. If paths not images - use_gpu (bool): Whether to use gpu. - batch_size(int): the program deals once with one output_dir (str): The directory to store output images. visualization (bool): Whether to save image or not. - box_thresh(float): the threshold of the detected text box's confidence - text_thresh(float): the threshold of the chinese text recognition confidence - angle_classification_thresh(float): the threshold of the angle classification confidence - Returns: - res (list): The result of chinese texts and save path of images. + res (list): The result of text detection box and save path of images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." - ) - - self.use_gpu = use_gpu - - if images != [] and isinstance(images, list) and paths == []: - predicted_data = images - elif images == [] and isinstance(paths, list) and paths != []: - predicted_data = self.read_images(paths) - else: - raise TypeError("The input data is inconsistent with expectations.") - - assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - - detection_results = self.text_detector_module.detect_text( - images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) - print('*'*10) - print(detection_results) - - boxes = [ - np.array(item['data']).astype(np.float32) - for item in detection_results - ] - all_results = [] - for index, img_boxes in enumerate(boxes): - original_image = predicted_data[index].copy() - result = {'save_path': ''} - if img_boxes.size == 0: - result['data'] = [] - else: - img_crop_list = [] - boxes = sorted_boxes(img_boxes) - for num_box in range(len(boxes)): - tmp_box = copy.deepcopy(boxes[num_box]) - img_crop = self.get_rotate_crop_image( - original_image, tmp_box) - img_crop_list.append(img_crop) - - if self.use_angle_classification: - img_crop_list, angle_list = self._classify_text( - img_crop_list, - angle_classification_thresh=angle_classification_thresh) - - rec_results = self._recognize_text(img_crop_list) - - # if the recognized text confidence score is lower than text_thresh, then drop it - rec_res_final = [] - for index, res in enumerate(rec_results): - text, score = res - if score >= text_thresh: - rec_res_final.append({ - 'text': - text, - 'confidence': - float(score), - 'text_box_position': - boxes[index].astype(np.int).tolist() - }) - result['data'] = rec_res_final - - if visualization and result['data']: - result['save_path'] = self.save_result_image( - original_image, boxes, rec_results, output_dir, - text_thresh) - all_results.append(result) - + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) return all_results @serving @@ -310,282 +67,21 @@ class GermanOCRDBCRNNMobile(hub.Module): results = self.recognize_text(images_decode, **kwargs) return results - def save_result_image( - self, - original_image, - detection_boxes, - rec_results, - output_dir='ocr_result', - text_thresh=0.5, - ): - image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) - txts = [item[0] for item in rec_results] - scores = [item[1] for item in rec_results] - draw_img = draw_ocr( - image, - detection_boxes, - txts, - scores, - font_file=self.font_file, - draw_txt=True, - drop_score=text_thresh) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - ext = get_image_ext(original_image) - saved_name = 'ndarray_{}{}'.format(time.time(), ext) - save_file_path = os.path.join(output_dir, saved_name) - cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) - return save_file_path - - def _classify_text(self, image_list, angle_classification_thresh=0.9): - img_list = copy.deepcopy(image_list) - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the cls process - indices = np.argsort(np.array(width_list)) - - cls_res = [['', 0.0]] * img_num - batch_num = 30 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img_cls(img_list[indices[ino]]) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - - self.cls_input_tensor.copy_from_cpu(norm_img_batch) - self.cls_predictor.zero_copy_run() - - prob_out = self.cls_output_tensors[0].copy_to_cpu() - label_out = self.cls_output_tensors[1].copy_to_cpu() - if len(label_out.shape) != 1: - prob_out, label_out = label_out, prob_out - label_list = ['0', '180'] - for rno in range(len(label_out)): - label_idx = label_out[rno] - score = prob_out[rno][label_idx] - label = label_list[label_idx] - cls_res[indices[beg_img_no + rno]] = [label, score] - if '180' in label and score > angle_classification_thresh: - img_list[indices[beg_img_no + rno]] = cv2.rotate( - img_list[indices[beg_img_no + rno]], 1) - return img_list, cls_res - - def _recognize_text(self, img_list): - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the recognition process - indices = np.argsort(np.array(width_list)) - - rec_res = [['', 0.0]] * img_num - batch_num = 30 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img_rec(img_list[indices[ino]], - max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - - norm_img_batch = np.concatenate(norm_img_batch, axis=0) - norm_img_batch = norm_img_batch.copy() - - self.rec_input_tensor.copy_from_cpu(norm_img_batch) - self.rec_predictor.zero_copy_run() - - rec_idx_batch = self.rec_output_tensors[0].copy_to_cpu() - rec_idx_lod = self.rec_output_tensors[0].lod()[0] - predict_batch = self.rec_output_tensors[1].copy_to_cpu() - predict_lod = self.rec_output_tensors[1].lod()[0] - for rno in range(len(rec_idx_lod) - 1): - beg = rec_idx_lod[rno] - end = rec_idx_lod[rno + 1] - rec_idx_tmp = rec_idx_batch[beg:end, 0] - preds_text = self.char_ops.decode(rec_idx_tmp) - beg = predict_lod[rno] - end = predict_lod[rno + 1] - probs = predict_batch[beg:end, :] - ind = np.argmax(probs, axis=1) - blank = probs.shape[1] - valid_ind = np.where(ind != (blank - 1))[0] - if len(valid_ind) == 0: - continue - score = np.mean(probs[valid_ind, ind[valid_ind]]) - # rec_res.append([preds_text, score]) - rec_res[indices[beg_img_no + rno]] = [preds_text, score] - - return rec_res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - detector_dir = os.path.join(dirname, 'text_detector') - classifier_dir = os.path.join(dirname, 'angle_classifier') - recognizer_dir = os.path.join(dirname, 'text_recognizer') - self._save_detector_model(detector_dir, model_filename, params_filename, - combined) - if self.use_angle_classification: - self._save_classifier_model(classifier_dir, model_filename, - params_filename, combined) - - self._save_recognizer_model(recognizer_dir, model_filename, - params_filename, combined) - logger.info("The inference model has been saved in the path {}".format( - os.path.realpath(dirname))) - - def _save_detector_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - self.text_detector_module.save_inference_model( - dirname, model_filename, params_filename, combined) - - def _save_recognizer_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.rec_pretrained_model_path, 'model') - params_file_path = os.path.join(self.rec_pretrained_model_path, - 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.rec_pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def _save_classifier_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.cls_pretrained_model_path, 'model') - params_file_path = os.path.join(self.cls_pretrained_model_path, - 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.cls_pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @runnable def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - results = self.recognize_text( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) + results = self.model.run_cmd(argvs) return results - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='ocr_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="diretory to image") - + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. -if __name__ == '__main__': - ocr = GermanOCRDBCRNNMobile(enable_mkldnn=False, use_angle_classification=True) - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/ger_1.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/german_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/german_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/utils.py b/modules/image/text_recognition/german_ocr_db_crnn_mobile/utils.py deleted file mode 100644 index 8c41af300cc91de369a473cb7327b794b6cf5715..0000000000000000000000000000000000000000 --- a/modules/image/text_recognition/german_ocr_db_crnn_mobile/utils.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -from PIL import Image, ImageDraw, ImageFont -import base64 -import cv2 -import numpy as np - - -def draw_ocr(image, - boxes, - txts, - scores, - font_file, - draw_txt=True, - drop_score=0.5): - """ - Visualize the results of OCR detection and recognition - args: - image(Image|array): RGB image - boxes(list): boxes with shape(N, 4, 2) - txts(list): the texts - scores(list): txxs corresponding scores - draw_txt(bool): whether draw text or not - drop_score(float): only scores greater than drop_threshold will be visualized - return(array): - the visualized img - """ - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score or math.isnan(score): - continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - - if draw_txt: - img = np.array(resize_img(image, input_size=600)) - txt_img = text_visual( - txts, - scores, - font_file, - img_h=img.shape[0], - img_w=600, - threshold=drop_score) - img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) - return img - return image - - -def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): - """ - create new blank img and draw txt on it - args: - texts(list): the text will be draw - scores(list|None): corresponding score of each txt - img_h(int): the height of blank img - img_w(int): the width of blank img - return(array): - """ - if scores is not None: - assert len(texts) == len( - scores), "The number of txts and corresponding scores must match" - - def create_blank_img(): - blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 - blank_img[:, img_w - 1:] = 0 - blank_img = Image.fromarray(blank_img).convert("RGB") - draw_txt = ImageDraw.Draw(blank_img) - return blank_img, draw_txt - - blank_img, draw_txt = create_blank_img() - - font_size = 20 - txt_color = (0, 0, 0) - font = ImageFont.truetype(font_file, font_size, encoding="utf-8") - - gap = font_size + 5 - txt_img_list = [] - count, index = 1, 0 - for idx, txt in enumerate(texts): - index += 1 - if scores[idx] < threshold or math.isnan(scores[idx]): - index -= 1 - continue - first_line = True - while str_count(txt) >= img_w // font_size - 4: - tmp = txt - txt = tmp[:img_w // font_size - 4] - if first_line: - new_txt = str(index) + ': ' + txt - first_line = False - else: - new_txt = ' ' + txt - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - txt = tmp[img_w // font_size - 4:] - if count >= img_h // gap - 1: - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - if first_line: - new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) - else: - new_txt = " " + txt + " " + '%.3f' % (scores[idx]) - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - # whether add new blank img or not - if count >= img_h // gap - 1 and idx + 1 < len(texts): - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - txt_img_list.append(np.array(blank_img)) - if len(txt_img_list) == 1: - blank_img = np.array(txt_img_list[0]) - else: - blank_img = np.concatenate(txt_img_list, axis=1) - return np.array(blank_img) - - -def str_count(s): - """ - Count the number of Chinese characters, - a single English character and a single number - equal to half the length of Chinese characters. - args: - s(string): the input of string - return(int): - the number of Chinese characters - """ - import string - count_zh = count_pu = 0 - s_len = len(s) - en_dg_count = 0 - for c in s: - if c in string.ascii_letters or c.isdigit() or c.isspace(): - en_dg_count += 1 - elif c.isalpha(): - count_zh += 1 - else: - count_pu += 1 - return s_len - math.ceil(en_dg_count / 2) - - -def resize_img(img, input_size=600): - img = np.array(img) - im_shape = img.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) - return im - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" - - -def sorted_boxes(dt_boxes): - """ - Sort text boxes in order from top to bottom, left to right - args: - dt_boxes(array):detected text boxes with shape [4, 2] - return: - sorted boxes(array) with shape [4, 2] - """ - num_boxes = dt_boxes.shape[0] - sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) - _boxes = list(sorted_boxes) - - for i in range(num_boxes - 1): - if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): - tmp = _boxes[i] - _boxes[i] = _boxes[i + 1] - _boxes[i + 1] = tmp - return _boxes - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/README.md index 05f32a6621b4d81b5b14e1f1550449d22ad0f359..66a87dc54c14170c3ee8e9985c5d23e81fd03e91 100644 --- a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/README.md +++ b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/README.md @@ -27,18 +27,9 @@ - ### 1、环境依赖 - - paddlepaddle >= 1.8.0 + - paddlepaddle >= 2.0.2 - - paddlehub >= 1.8.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - - - shapely - - - pyclipper - - - ```shell - $ pip install shapely pyclipper - ``` - - **该Module依赖于第三方库shapely和pyclipper,使用该Module之前,请先安装shapely和pyclipper。** + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -58,7 +49,7 @@ ``` - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub @@ -160,13 +151,15 @@ print(r.json()["results"]) ``` - ## 五、更新历史 * 1.0.0 初始发布 +* 1.1.0 + + 优化模型 - ```shell - $ hub install japan_ocr_db_crnn_mobile==1.0.0 + $ hub install japan_ocr_db_crnn_mobile==1.1.0 ``` diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/assets/japan.ttc b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/assets/japan.ttc deleted file mode 100644 index ad68243b968fc87b207928594c585039859b75a9..0000000000000000000000000000000000000000 Binary files a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/assets/japan.ttc and /dev/null differ diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/assets/japan_dict.txt b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/assets/japan_dict.txt deleted file mode 100644 index 339d4b89e5159a346636641a0814874faa59754a..0000000000000000000000000000000000000000 --- a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/assets/japan_dict.txt +++ /dev/null @@ -1,4399 +0,0 @@ -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -Q -R -S -T -U -V -W -X -Y -Z -[ -] -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -© -° -² -´ -½ -Á -Ä -Å -Ç -È -É -Í -Ó -Ö -× -Ü -ß -à -á -â -ã -ä -å -æ -ç -è -é -ê -ë -í -ð -ñ -ò -ó -ô -õ -ö -ø -ú -û -ü -ý -ā -ă -ą -ć -Č -č -đ -ē -ė -ę -ğ -ī -ı -Ł -ł -ń -ň -ō -ř -Ş -ş -Š -š -ţ -ū -ż -Ž -ž -Ș -ș -ț -Δ -α -λ -μ -φ -Г -О -а -в -л -о -р -с -т -я -ồ -​ -— -― -’ -“ -” -… -℃ -→ -∇ -− -■ -☆ -  -、 -。 -々 -〆 -〈 -〉 -「 -」 -『 -』 -〔 -〕 -〜 -ぁ -あ -ぃ -い -う -ぇ -え -ぉ -お -か -が -き -ぎ -く -ぐ -け -げ -こ -ご -さ -ざ -し -じ -す -ず -せ -ぜ -そ -ぞ -た -だ -ち -ぢ -っ -つ -づ -て -で -と -ど -な -に -ぬ -ね -の -は -ば -ぱ -ひ -び -ぴ -ふ -ぶ -ぷ -へ -べ -ぺ -ほ -ぼ -ぽ -ま -み -む -め -も -ゃ -や -ゅ -ゆ -ょ -よ -ら -り -る -れ -ろ -わ -ゑ -を -ん -ゝ -ゞ -ァ -ア -ィ -イ -ゥ -ウ -ェ -エ -ォ -オ -カ -ガ -キ -ギ -ク -グ -ケ -ゲ -コ -ゴ -サ -ザ -シ -ジ -ス -ズ -セ -ゼ -ソ -ゾ -タ -ダ -チ -ヂ -ッ -ツ -ヅ -テ -デ -ト -ド -ナ -ニ -ヌ -ネ -ノ -ハ -バ -パ -ヒ -ビ -ピ -フ -ブ -プ -ヘ -ベ -ペ -ホ -ボ -ポ -マ -ミ -ム -メ -モ -ャ -ヤ -ュ -ユ -ョ -ヨ -ラ -リ -ル -レ -ロ -ワ -ヰ -ン -ヴ -ヵ -ヶ -・ -ー -㈱ -一 -丁 -七 -万 -丈 -三 -上 -下 -不 -与 -丑 -且 -世 -丘 -丙 -丞 -両 -並 -中 -串 -丸 -丹 -主 -丼 -丿 -乃 -久 -之 -乎 -乏 -乗 -乘 -乙 -九 -乞 -也 -乱 -乳 -乾 -亀 -了 -予 -争 -事 -二 -于 -互 -五 -井 -亘 -亙 -些 -亜 -亟 -亡 -交 -亥 -亦 -亨 -享 -京 -亭 -亮 -人 -什 -仁 -仇 -今 -介 -仍 -仏 -仔 -仕 -他 -仗 -付 -仙 -代 -令 -以 -仮 -仰 -仲 -件 -任 -企 -伊 -伍 -伎 -伏 -伐 -休 -会 -伝 -伯 -估 -伴 -伶 -伸 -伺 -似 -伽 -佃 -但 -位 -低 -住 -佐 -佑 -体 -何 -余 -佚 -佛 -作 -佩 -佳 -併 -佶 -使 -侈 -例 -侍 -侏 -侑 -侘 -供 -依 -侠 -価 -侮 -侯 -侵 -侶 -便 -係 -促 -俄 -俊 -俔 -俗 -俘 -保 -信 -俣 -俤 -修 -俯 -俳 -俵 -俸 -俺 -倉 -個 -倍 -倒 -候 -借 -倣 -値 -倫 -倭 -倶 -倹 -偃 -假 -偈 -偉 -偏 -偐 -偕 -停 -健 -側 -偵 -偶 -偽 -傀 -傅 -傍 -傑 -傘 -備 -催 -傭 -傲 -傳 -債 -傷 -傾 -僊 -働 -像 -僑 -僕 -僚 -僧 -僭 -僮 -儀 -億 -儇 -儒 -儛 -償 -儡 -優 -儲 -儺 -儼 -兀 -允 -元 -兄 -充 -兆 -先 -光 -克 -兌 -免 -兎 -児 -党 -兜 -入 -全 -八 -公 -六 -共 -兵 -其 -具 -典 -兼 -内 -円 -冊 -再 -冑 -冒 -冗 -写 -冠 -冤 -冥 -冨 -冬 -冲 -决 -冶 -冷 -准 -凉 -凋 -凌 -凍 -凛 -凝 -凞 -几 -凡 -処 -凪 -凰 -凱 -凶 -凸 -凹 -出 -函 -刀 -刃 -分 -切 -刈 -刊 -刎 -刑 -列 -初 -判 -別 -利 -刪 -到 -制 -刷 -券 -刹 -刺 -刻 -剃 -則 -削 -剋 -前 -剖 -剛 -剣 -剤 -剥 -剪 -副 -剰 -割 -創 -剽 -劇 -劉 -劔 -力 -功 -加 -劣 -助 -努 -劫 -劭 -励 -労 -効 -劾 -勃 -勅 -勇 -勉 -勒 -動 -勘 -務 -勝 -募 -勢 -勤 -勧 -勲 -勺 -勾 -勿 -匁 -匂 -包 -匏 -化 -北 -匙 -匝 -匠 -匡 -匣 -匯 -匲 -匹 -区 -医 -匿 -十 -千 -升 -午 -卉 -半 -卍 -卑 -卒 -卓 -協 -南 -単 -博 -卜 -占 -卦 -卯 -印 -危 -即 -却 -卵 -卸 -卿 -厄 -厚 -原 -厠 -厨 -厩 -厭 -厳 -去 -参 -又 -叉 -及 -友 -双 -反 -収 -叔 -取 -受 -叙 -叛 -叟 -叡 -叢 -口 -古 -句 -叩 -只 -叫 -召 -可 -台 -叱 -史 -右 -叶 -号 -司 -吃 -各 -合 -吉 -吊 -同 -名 -后 -吏 -吐 -向 -君 -吝 -吟 -吠 -否 -含 -吸 -吹 -吻 -吽 -吾 -呂 -呆 -呈 -呉 -告 -呑 -周 -呪 -呰 -味 -呼 -命 -咀 -咄 -咋 -和 -咒 -咫 -咲 -咳 -咸 -哀 -品 -哇 -哉 -員 -哨 -哩 -哭 -哲 -哺 -唄 -唆 -唇 -唐 -唖 -唯 -唱 -唳 -唸 -唾 -啄 -商 -問 -啓 -啼 -善 -喋 -喚 -喜 -喝 -喧 -喩 -喪 -喫 -喬 -單 -喰 -営 -嗅 -嗇 -嗔 -嗚 -嗜 -嗣 -嘆 -嘉 -嘗 -嘘 -嘩 -嘯 -嘱 -嘲 -嘴 -噂 -噌 -噛 -器 -噴 -噺 -嚆 -嚢 -囀 -囃 -囉 -囚 -四 -回 -因 -団 -困 -囲 -図 -固 -国 -圀 -圃 -國 -圏 -園 -圓 -團 -圜 -土 -圧 -在 -圭 -地 -址 -坂 -均 -坊 -坐 -坑 -坡 -坤 -坦 -坪 -垂 -型 -垢 -垣 -埃 -埋 -城 -埒 -埔 -域 -埠 -埴 -埵 -執 -培 -基 -埼 -堀 -堂 -堅 -堆 -堕 -堤 -堪 -堯 -堰 -報 -場 -堵 -堺 -塀 -塁 -塊 -塑 -塔 -塗 -塘 -塙 -塚 -塞 -塩 -填 -塵 -塾 -境 -墉 -墓 -増 -墜 -墟 -墨 -墳 -墺 -墻 -墾 -壁 -壇 -壊 -壌 -壕 -士 -壬 -壮 -声 -壱 -売 -壷 -壹 -壺 -壽 -変 -夏 -夕 -外 -夙 -多 -夜 -夢 -夥 -大 -天 -太 -夫 -夬 -夭 -央 -失 -夷 -夾 -奄 -奇 -奈 -奉 -奎 -奏 -契 -奔 -奕 -套 -奘 -奠 -奢 -奥 -奨 -奪 -奮 -女 -奴 -奸 -好 -如 -妃 -妄 -妊 -妍 -妓 -妖 -妙 -妥 -妨 -妬 -妲 -妹 -妻 -妾 -姉 -始 -姐 -姓 -委 -姚 -姜 -姞 -姥 -姦 -姨 -姪 -姫 -姶 -姻 -姿 -威 -娑 -娘 -娟 -娠 -娩 -娯 -娼 -婆 -婉 -婚 -婢 -婦 -婬 -婿 -媄 -媒 -媓 -媚 -媛 -媞 -媽 -嫁 -嫄 -嫉 -嫌 -嫐 -嫗 -嫡 -嬉 -嬌 -嬢 -嬪 -嬬 -嬾 -孁 -子 -孔 -字 -存 -孚 -孝 -孟 -季 -孤 -学 -孫 -孵 -學 -宅 -宇 -守 -安 -宋 -完 -宍 -宏 -宕 -宗 -官 -宙 -定 -宛 -宜 -宝 -実 -客 -宣 -室 -宥 -宮 -宰 -害 -宴 -宵 -家 -宸 -容 -宿 -寂 -寄 -寅 -密 -寇 -富 -寒 -寓 -寔 -寛 -寝 -察 -寡 -實 -寧 -審 -寮 -寵 -寶 -寸 -寺 -対 -寿 -封 -専 -射 -将 -尉 -尊 -尋 -對 -導 -小 -少 -尖 -尚 -尤 -尪 -尭 -就 -尹 -尺 -尻 -尼 -尽 -尾 -尿 -局 -居 -屈 -届 -屋 -屍 -屎 -屏 -屑 -屓 -展 -属 -屠 -層 -履 -屯 -山 -岐 -岑 -岡 -岩 -岫 -岬 -岳 -岷 -岸 -峠 -峡 -峨 -峯 -峰 -島 -峻 -崇 -崋 -崎 -崑 -崖 -崗 -崛 -崩 -嵌 -嵐 -嵩 -嵯 -嶂 -嶋 -嶠 -嶺 -嶼 -嶽 -巀 -巌 -巒 -巖 -川 -州 -巡 -巣 -工 -左 -巧 -巨 -巫 -差 -己 -巳 -巴 -巷 -巻 -巽 -巾 -市 -布 -帆 -希 -帖 -帚 -帛 -帝 -帥 -師 -席 -帯 -帰 -帳 -帷 -常 -帽 -幄 -幅 -幇 -幌 -幔 -幕 -幟 -幡 -幢 -幣 -干 -平 -年 -并 -幸 -幹 -幻 -幼 -幽 -幾 -庁 -広 -庄 -庇 -床 -序 -底 -庖 -店 -庚 -府 -度 -座 -庫 -庭 -庵 -庶 -康 -庸 -廂 -廃 -廉 -廊 -廓 -廟 -廠 -廣 -廬 -延 -廷 -建 -廻 -廼 -廿 -弁 -弄 -弉 -弊 -弌 -式 -弐 -弓 -弔 -引 -弖 -弗 -弘 -弛 -弟 -弥 -弦 -弧 -弱 -張 -強 -弼 -弾 -彈 -彊 -彌 -彎 -当 -彗 -彙 -彝 -形 -彦 -彩 -彫 -彬 -彭 -彰 -影 -彷 -役 -彼 -往 -征 -徂 -径 -待 -律 -後 -徐 -徑 -徒 -従 -得 -徠 -御 -徧 -徨 -復 -循 -徭 -微 -徳 -徴 -德 -徹 -徽 -心 -必 -忉 -忌 -忍 -志 -忘 -忙 -応 -忠 -快 -忯 -念 -忻 -忽 -忿 -怒 -怖 -思 -怠 -怡 -急 -性 -怨 -怪 -怯 -恂 -恋 -恐 -恒 -恕 -恣 -恤 -恥 -恨 -恩 -恬 -恭 -息 -恵 -悉 -悌 -悍 -悔 -悟 -悠 -患 -悦 -悩 -悪 -悲 -悼 -情 -惇 -惑 -惚 -惜 -惟 -惠 -惣 -惧 -惨 -惰 -想 -惹 -惺 -愈 -愉 -愍 -意 -愔 -愚 -愛 -感 -愷 -愿 -慈 -態 -慌 -慎 -慕 -慢 -慣 -慧 -慨 -慮 -慰 -慶 -憂 -憎 -憐 -憑 -憙 -憤 -憧 -憩 -憬 -憲 -憶 -憾 -懇 -應 -懌 -懐 -懲 -懸 -懺 -懽 -懿 -戈 -戊 -戌 -戎 -成 -我 -戒 -戔 -或 -戚 -戟 -戦 -截 -戮 -戯 -戴 -戸 -戻 -房 -所 -扁 -扇 -扈 -扉 -手 -才 -打 -払 -托 -扮 -扱 -扶 -批 -承 -技 -抄 -把 -抑 -抓 -投 -抗 -折 -抜 -択 -披 -抱 -抵 -抹 -押 -抽 -担 -拇 -拈 -拉 -拍 -拏 -拐 -拒 -拓 -拘 -拙 -招 -拝 -拠 -拡 -括 -拭 -拳 -拵 -拶 -拾 -拿 -持 -挂 -指 -按 -挑 -挙 -挟 -挨 -振 -挺 -挽 -挿 -捉 -捕 -捗 -捜 -捧 -捨 -据 -捺 -捻 -掃 -掄 -授 -掌 -排 -掖 -掘 -掛 -掟 -採 -探 -掣 -接 -控 -推 -掩 -措 -掬 -掲 -掴 -掻 -掾 -揃 -揄 -揆 -揉 -描 -提 -揖 -揚 -換 -握 -揮 -援 -揶 -揺 -損 -搦 -搬 -搭 -携 -搾 -摂 -摘 -摩 -摸 -摺 -撃 -撒 -撞 -撤 -撥 -撫 -播 -撮 -撰 -撲 -撹 -擁 -操 -擔 -擦 -擬 -擾 -攘 -攝 -攣 -支 -收 -改 -攻 -放 -政 -故 -敏 -救 -敗 -教 -敢 -散 -敦 -敬 -数 -整 -敵 -敷 -斂 -文 -斉 -斎 -斐 -斑 -斗 -料 -斜 -斟 -斤 -斥 -斧 -斬 -断 -斯 -新 -方 -於 -施 -旁 -旅 -旋 -旌 -族 -旗 -旛 -无 -旡 -既 -日 -旦 -旧 -旨 -早 -旬 -旭 -旺 -旻 -昂 -昆 -昇 -昉 -昌 -明 -昏 -易 -昔 -星 -映 -春 -昧 -昨 -昪 -昭 -是 -昵 -昼 -晁 -時 -晃 -晋 -晏 -晒 -晟 -晦 -晧 -晩 -普 -景 -晴 -晶 -智 -暁 -暇 -暈 -暉 -暑 -暖 -暗 -暘 -暢 -暦 -暫 -暮 -暲 -暴 -暹 -暾 -曄 -曇 -曉 -曖 -曙 -曜 -曝 -曠 -曰 -曲 -曳 -更 -書 -曹 -曼 -曽 -曾 -替 -最 -會 -月 -有 -朋 -服 -朏 -朔 -朕 -朗 -望 -朝 -期 -朧 -木 -未 -末 -本 -札 -朱 -朴 -机 -朽 -杁 -杉 -李 -杏 -材 -村 -杓 -杖 -杜 -杞 -束 -条 -杢 -杣 -来 -杭 -杮 -杯 -東 -杲 -杵 -杷 -杼 -松 -板 -枅 -枇 -析 -枓 -枕 -林 -枚 -果 -枝 -枠 -枡 -枢 -枯 -枳 -架 -柄 -柊 -柏 -某 -柑 -染 -柔 -柘 -柚 -柯 -柱 -柳 -柴 -柵 -査 -柾 -柿 -栂 -栃 -栄 -栖 -栗 -校 -株 -栲 -栴 -核 -根 -栻 -格 -栽 -桁 -桂 -桃 -框 -案 -桐 -桑 -桓 -桔 -桜 -桝 -桟 -桧 -桴 -桶 -桾 -梁 -梅 -梆 -梓 -梔 -梗 -梛 -條 -梟 -梢 -梧 -梨 -械 -梱 -梲 -梵 -梶 -棄 -棋 -棒 -棗 -棘 -棚 -棟 -棠 -森 -棲 -棹 -棺 -椀 -椅 -椋 -植 -椎 -椏 -椒 -椙 -検 -椥 -椹 -椿 -楊 -楓 -楕 -楚 -楞 -楠 -楡 -楢 -楨 -楪 -楫 -業 -楮 -楯 -楳 -極 -楷 -楼 -楽 -概 -榊 -榎 -榕 -榛 -榜 -榮 -榱 -榴 -槃 -槇 -槊 -構 -槌 -槍 -槐 -様 -槙 -槻 -槽 -槿 -樂 -樋 -樓 -樗 -標 -樟 -模 -権 -横 -樫 -樵 -樹 -樺 -樽 -橇 -橋 -橘 -機 -橿 -檀 -檄 -檎 -檐 -檗 -檜 -檣 -檥 -檬 -檮 -檸 -檻 -櫃 -櫓 -櫛 -櫟 -櫨 -櫻 -欄 -欅 -欠 -次 -欣 -欧 -欲 -欺 -欽 -款 -歌 -歎 -歓 -止 -正 -此 -武 -歩 -歪 -歯 -歳 -歴 -死 -殆 -殉 -殊 -残 -殖 -殯 -殴 -段 -殷 -殺 -殻 -殿 -毀 -毅 -母 -毎 -毒 -比 -毘 -毛 -毫 -毬 -氈 -氏 -民 -気 -水 -氷 -永 -氾 -汀 -汁 -求 -汎 -汐 -汗 -汚 -汝 -江 -池 -汪 -汰 -汲 -決 -汽 -沂 -沃 -沅 -沆 -沈 -沌 -沐 -沓 -沖 -沙 -没 -沢 -沱 -河 -沸 -油 -治 -沼 -沽 -沿 -況 -泉 -泊 -泌 -法 -泗 -泡 -波 -泣 -泥 -注 -泯 -泰 -泳 -洋 -洒 -洗 -洛 -洞 -津 -洩 -洪 -洲 -洸 -洹 -活 -洽 -派 -流 -浄 -浅 -浙 -浚 -浜 -浣 -浦 -浩 -浪 -浮 -浴 -海 -浸 -涅 -消 -涌 -涙 -涛 -涯 -液 -涵 -涼 -淀 -淄 -淆 -淇 -淋 -淑 -淘 -淡 -淤 -淨 -淫 -深 -淳 -淵 -混 -淹 -添 -清 -済 -渉 -渋 -渓 -渕 -渚 -減 -渟 -渠 -渡 -渤 -渥 -渦 -温 -渫 -測 -港 -游 -渾 -湊 -湖 -湘 -湛 -湧 -湫 -湯 -湾 -湿 -満 -源 -準 -溜 -溝 -溢 -溥 -溪 -溶 -溺 -滄 -滅 -滋 -滌 -滑 -滕 -滝 -滞 -滴 -滸 -滹 -滿 -漁 -漂 -漆 -漉 -漏 -漑 -演 -漕 -漠 -漢 -漣 -漫 -漬 -漱 -漸 -漿 -潅 -潔 -潙 -潜 -潟 -潤 -潭 -潮 -潰 -潴 -澁 -澂 -澄 -澎 -澗 -澤 -澪 -澱 -澳 -激 -濁 -濃 -濟 -濠 -濡 -濤 -濫 -濯 -濱 -濾 -瀉 -瀋 -瀑 -瀕 -瀞 -瀟 -瀧 -瀬 -瀾 -灌 -灑 -灘 -火 -灯 -灰 -灸 -災 -炉 -炊 -炎 -炒 -炭 -炮 -炷 -点 -為 -烈 -烏 -烙 -烝 -烹 -焔 -焙 -焚 -無 -焦 -然 -焼 -煇 -煉 -煌 -煎 -煕 -煙 -煤 -煥 -照 -煩 -煬 -煮 -煽 -熈 -熊 -熙 -熟 -熨 -熱 -熹 -熾 -燃 -燈 -燎 -燔 -燕 -燗 -燥 -燭 -燻 -爆 -爐 -爪 -爬 -爲 -爵 -父 -爺 -爼 -爽 -爾 -片 -版 -牌 -牒 -牘 -牙 -牛 -牝 -牟 -牡 -牢 -牧 -物 -牲 -特 -牽 -犂 -犠 -犬 -犯 -状 -狂 -狄 -狐 -狗 -狙 -狛 -狡 -狩 -独 -狭 -狷 -狸 -狼 -猊 -猛 -猟 -猥 -猨 -猩 -猪 -猫 -献 -猴 -猶 -猷 -猾 -猿 -獄 -獅 -獏 -獣 -獲 -玄 -玅 -率 -玉 -王 -玖 -玩 -玲 -珀 -珂 -珈 -珉 -珊 -珍 -珎 -珞 -珠 -珣 -珥 -珪 -班 -現 -球 -理 -琉 -琢 -琥 -琦 -琮 -琲 -琳 -琴 -琵 -琶 -瑁 -瑋 -瑙 -瑚 -瑛 -瑜 -瑞 -瑠 -瑤 -瑩 -瑪 -瑳 -瑾 -璃 -璋 -璜 -璞 -璧 -璨 -環 -璵 -璽 -璿 -瓊 -瓔 -瓜 -瓢 -瓦 -瓶 -甍 -甑 -甕 -甘 -甚 -甞 -生 -産 -甥 -用 -甫 -田 -由 -甲 -申 -男 -町 -画 -界 -畏 -畑 -畔 -留 -畜 -畝 -畠 -畢 -略 -番 -異 -畳 -當 -畷 -畸 -畺 -畿 -疆 -疇 -疋 -疎 -疏 -疑 -疫 -疱 -疲 -疹 -疼 -疾 -病 -症 -痒 -痔 -痕 -痘 -痙 -痛 -痢 -痩 -痴 -痺 -瘍 -瘡 -瘧 -療 -癇 -癌 -癒 -癖 -癡 -癪 -発 -登 -白 -百 -的 -皆 -皇 -皋 -皐 -皓 -皮 -皺 -皿 -盂 -盃 -盆 -盈 -益 -盒 -盗 -盛 -盞 -盟 -盡 -監 -盤 -盥 -盧 -目 -盲 -直 -相 -盾 -省 -眉 -看 -県 -眞 -真 -眠 -眷 -眺 -眼 -着 -睡 -督 -睦 -睨 -睿 -瞋 -瞑 -瞞 -瞬 -瞭 -瞰 -瞳 -瞻 -瞼 -瞿 -矍 -矛 -矜 -矢 -知 -矧 -矩 -短 -矮 -矯 -石 -砂 -砌 -研 -砕 -砥 -砦 -砧 -砲 -破 -砺 -硝 -硫 -硬 -硯 -碁 -碇 -碌 -碑 -碓 -碕 -碗 -碣 -碧 -碩 -確 -碾 -磁 -磐 -磔 -磧 -磨 -磬 -磯 -礁 -礎 -礒 -礙 -礫 -礬 -示 -礼 -社 -祀 -祁 -祇 -祈 -祉 -祐 -祓 -祕 -祖 -祗 -祚 -祝 -神 -祟 -祠 -祢 -祥 -票 -祭 -祷 -祺 -禁 -禄 -禅 -禊 -禍 -禎 -福 -禔 -禖 -禛 -禦 -禧 -禮 -禰 -禹 -禽 -禿 -秀 -私 -秋 -科 -秒 -秘 -租 -秤 -秦 -秩 -称 -移 -稀 -程 -税 -稔 -稗 -稙 -稚 -稜 -稠 -種 -稱 -稲 -稷 -稻 -稼 -稽 -稿 -穀 -穂 -穆 -積 -穎 -穏 -穗 -穜 -穢 -穣 -穫 -穴 -究 -空 -突 -窃 -窄 -窒 -窓 -窟 -窠 -窩 -窪 -窮 -窯 -竃 -竄 -竈 -立 -站 -竜 -竝 -竟 -章 -童 -竪 -竭 -端 -竴 -競 -竹 -竺 -竽 -竿 -笄 -笈 -笏 -笑 -笙 -笛 -笞 -笠 -笥 -符 -第 -笹 -筅 -筆 -筇 -筈 -等 -筋 -筌 -筍 -筏 -筐 -筑 -筒 -答 -策 -筝 -筥 -筧 -筬 -筮 -筯 -筰 -筵 -箆 -箇 -箋 -箏 -箒 -箔 -箕 -算 -箙 -箜 -管 -箪 -箭 -箱 -箸 -節 -篁 -範 -篆 -篇 -築 -篋 -篌 -篝 -篠 -篤 -篥 -篦 -篩 -篭 -篳 -篷 -簀 -簒 -簡 -簧 -簪 -簫 -簺 -簾 -簿 -籀 -籃 -籌 -籍 -籐 -籟 -籠 -籤 -籬 -米 -籾 -粂 -粉 -粋 -粒 -粕 -粗 -粘 -粛 -粟 -粥 -粧 -粮 -粳 -精 -糊 -糖 -糜 -糞 -糟 -糠 -糧 -糯 -糸 -糺 -系 -糾 -紀 -約 -紅 -紋 -納 -紐 -純 -紗 -紘 -紙 -級 -紛 -素 -紡 -索 -紫 -紬 -累 -細 -紳 -紵 -紹 -紺 -絁 -終 -絃 -組 -絅 -経 -結 -絖 -絞 -絡 -絣 -給 -統 -絲 -絵 -絶 -絹 -絽 -綏 -經 -継 -続 -綜 -綟 -綬 -維 -綱 -網 -綴 -綸 -綺 -綽 -綾 -綿 -緊 -緋 -総 -緑 -緒 -線 -締 -緥 -編 -緩 -緬 -緯 -練 -緻 -縁 -縄 -縅 -縒 -縛 -縞 -縢 -縣 -縦 -縫 -縮 -縹 -總 -績 -繁 -繊 -繋 -繍 -織 -繕 -繝 -繦 -繧 -繰 -繹 -繼 -纂 -纈 -纏 -纐 -纒 -纛 -缶 -罔 -罠 -罧 -罪 -置 -罰 -署 -罵 -罷 -罹 -羂 -羅 -羆 -羇 -羈 -羊 -羌 -美 -群 -羨 -義 -羯 -羲 -羹 -羽 -翁 -翅 -翌 -習 -翔 -翛 -翠 -翡 -翫 -翰 -翺 -翻 -翼 -耀 -老 -考 -者 -耆 -而 -耐 -耕 -耗 -耨 -耳 -耶 -耽 -聊 -聖 -聘 -聚 -聞 -聟 -聡 -聨 -聯 -聰 -聲 -聴 -職 -聾 -肄 -肆 -肇 -肉 -肋 -肌 -肖 -肘 -肛 -肝 -股 -肢 -肥 -肩 -肪 -肯 -肱 -育 -肴 -肺 -胃 -胆 -背 -胎 -胖 -胚 -胝 -胞 -胡 -胤 -胱 -胴 -胸 -能 -脂 -脅 -脆 -脇 -脈 -脊 -脚 -脛 -脩 -脱 -脳 -腋 -腎 -腐 -腑 -腔 -腕 -腫 -腰 -腱 -腸 -腹 -腺 -腿 -膀 -膏 -膚 -膜 -膝 -膠 -膣 -膨 -膩 -膳 -膵 -膾 -膿 -臂 -臆 -臈 -臍 -臓 -臘 -臚 -臣 -臥 -臨 -自 -臭 -至 -致 -臺 -臼 -舂 -舅 -與 -興 -舌 -舍 -舎 -舒 -舖 -舗 -舘 -舜 -舞 -舟 -舩 -航 -般 -舳 -舶 -船 -艇 -艘 -艦 -艮 -良 -色 -艶 -芋 -芒 -芙 -芝 -芥 -芦 -芬 -芭 -芯 -花 -芳 -芸 -芹 -芻 -芽 -芿 -苅 -苑 -苔 -苗 -苛 -苞 -苡 -若 -苦 -苧 -苫 -英 -苴 -苻 -茂 -范 -茄 -茅 -茎 -茗 -茘 -茜 -茨 -茲 -茵 -茶 -茸 -茹 -草 -荊 -荏 -荒 -荘 -荷 -荻 -荼 -莞 -莪 -莫 -莬 -莱 -莵 -莽 -菅 -菊 -菌 -菓 -菖 -菘 -菜 -菟 -菩 -菫 -華 -菱 -菴 -萄 -萊 -萌 -萍 -萎 -萠 -萩 -萬 -萱 -落 -葉 -著 -葛 -葡 -董 -葦 -葩 -葬 -葭 -葱 -葵 -葺 -蒋 -蒐 -蒔 -蒙 -蒟 -蒡 -蒲 -蒸 -蒻 -蒼 -蒿 -蓄 -蓆 -蓉 -蓋 -蓑 -蓬 -蓮 -蓼 -蔀 -蔑 -蔓 -蔚 -蔡 -蔦 -蔬 -蔭 -蔵 -蔽 -蕃 -蕉 -蕊 -蕎 -蕨 -蕩 -蕪 -蕭 -蕾 -薄 -薇 -薊 -薔 -薗 -薙 -薛 -薦 -薨 -薩 -薪 -薫 -薬 -薭 -薮 -藁 -藉 -藍 -藏 -藐 -藝 -藤 -藩 -藪 -藷 -藹 -藺 -藻 -蘂 -蘆 -蘇 -蘊 -蘭 -虎 -虐 -虔 -虚 -虜 -虞 -號 -虫 -虹 -虻 -蚊 -蚕 -蛇 -蛉 -蛍 -蛎 -蛙 -蛛 -蛟 -蛤 -蛭 -蛮 -蛸 -蛹 -蛾 -蜀 -蜂 -蜃 -蜆 -蜊 -蜘 -蜜 -蜷 -蜻 -蝉 -蝋 -蝕 -蝙 -蝠 -蝦 -蝶 -蝿 -螂 -融 -螣 -螺 -蟄 -蟇 -蟠 -蟷 -蟹 -蟻 -蠢 -蠣 -血 -衆 -行 -衍 -衒 -術 -街 -衙 -衛 -衝 -衞 -衡 -衢 -衣 -表 -衫 -衰 -衵 -衷 -衽 -衾 -衿 -袁 -袈 -袋 -袍 -袒 -袖 -袙 -袞 -袢 -被 -袰 -袱 -袴 -袷 -袿 -裁 -裂 -裃 -装 -裏 -裔 -裕 -裘 -裙 -補 -裟 -裡 -裲 -裳 -裴 -裸 -裹 -製 -裾 -褂 -褄 -複 -褌 -褐 -褒 -褥 -褪 -褶 -褻 -襄 -襖 -襞 -襟 -襠 -襦 -襪 -襲 -襴 -襷 -西 -要 -覆 -覇 -覈 -見 -規 -視 -覗 -覚 -覧 -親 -覲 -観 -覺 -觀 -角 -解 -触 -言 -訂 -計 -討 -訓 -託 -記 -訛 -訟 -訢 -訥 -訪 -設 -許 -訳 -訴 -訶 -診 -註 -証 -詐 -詔 -評 -詛 -詞 -詠 -詢 -詣 -試 -詩 -詫 -詮 -詰 -話 -該 -詳 -誄 -誅 -誇 -誉 -誌 -認 -誓 -誕 -誘 -語 -誠 -誡 -誣 -誤 -誥 -誦 -説 -読 -誰 -課 -誼 -誾 -調 -談 -請 -諌 -諍 -諏 -諒 -論 -諚 -諜 -諟 -諡 -諦 -諧 -諫 -諭 -諮 -諱 -諶 -諷 -諸 -諺 -諾 -謀 -謄 -謌 -謎 -謗 -謙 -謚 -講 -謝 -謡 -謫 -謬 -謹 -證 -識 -譚 -譛 -譜 -警 -譬 -譯 -議 -譲 -譴 -護 -讀 -讃 -讐 -讒 -谷 -谿 -豅 -豆 -豊 -豎 -豐 -豚 -象 -豪 -豫 -豹 -貌 -貝 -貞 -負 -財 -貢 -貧 -貨 -販 -貪 -貫 -責 -貯 -貰 -貴 -買 -貸 -費 -貼 -貿 -賀 -賁 -賂 -賃 -賄 -資 -賈 -賊 -賎 -賑 -賓 -賛 -賜 -賞 -賠 -賢 -賣 -賤 -賦 -質 -賭 -購 -賽 -贄 -贅 -贈 -贋 -贔 -贖 -赤 -赦 -走 -赴 -起 -超 -越 -趙 -趣 -足 -趺 -趾 -跋 -跏 -距 -跡 -跨 -跪 -路 -跳 -践 -踊 -踏 -踐 -踞 -踪 -踵 -蹄 -蹉 -蹊 -蹟 -蹲 -蹴 -躅 -躇 -躊 -躍 -躑 -躙 -躪 -身 -躬 -躯 -躰 -車 -軋 -軌 -軍 -軒 -軟 -転 -軸 -軻 -軽 -軾 -較 -載 -輌 -輔 -輜 -輝 -輦 -輩 -輪 -輯 -輸 -輿 -轄 -轍 -轟 -轢 -辛 -辞 -辟 -辥 -辦 -辨 -辰 -辱 -農 -辺 -辻 -込 -迂 -迅 -迎 -近 -返 -迢 -迦 -迪 -迫 -迭 -述 -迷 -迹 -追 -退 -送 -逃 -逅 -逆 -逍 -透 -逐 -逓 -途 -逕 -逗 -這 -通 -逝 -逞 -速 -造 -逢 -連 -逮 -週 -進 -逸 -逼 -遁 -遂 -遅 -遇 -遊 -運 -遍 -過 -遐 -道 -達 -違 -遙 -遜 -遠 -遡 -遣 -遥 -適 -遭 -遮 -遯 -遵 -遷 -選 -遺 -遼 -避 -邀 -邁 -邂 -邃 -還 -邇 -邉 -邊 -邑 -那 -邦 -邨 -邪 -邯 -邵 -邸 -郁 -郊 -郎 -郡 -郢 -部 -郭 -郴 -郵 -郷 -都 -鄂 -鄙 -鄭 -鄰 -鄲 -酉 -酋 -酌 -配 -酎 -酒 -酔 -酢 -酥 -酪 -酬 -酵 -酷 -酸 -醍 -醐 -醒 -醗 -醜 -醤 -醪 -醵 -醸 -采 -釈 -釉 -釋 -里 -重 -野 -量 -釐 -金 -釘 -釜 -針 -釣 -釧 -釿 -鈍 -鈎 -鈐 -鈔 -鈞 -鈦 -鈴 -鈷 -鈸 -鈿 -鉄 -鉇 -鉉 -鉋 -鉛 -鉢 -鉤 -鉦 -鉱 -鉾 -銀 -銃 -銅 -銈 -銑 -銕 -銘 -銚 -銜 -銭 -鋏 -鋒 -鋤 -鋭 -鋲 -鋳 -鋸 -鋺 -鋼 -錆 -錍 -錐 -錘 -錠 -錣 -錦 -錫 -錬 -錯 -録 -錵 -鍋 -鍍 -鍑 -鍔 -鍛 -鍬 -鍮 -鍵 -鍼 -鍾 -鎌 -鎖 -鎗 -鎚 -鎧 -鎬 -鎮 -鎰 -鎹 -鏃 -鏑 -鏡 -鐃 -鐇 -鐐 -鐔 -鐘 -鐙 -鐚 -鐡 -鐵 -鐸 -鑁 -鑊 -鑑 -鑒 -鑚 -鑠 -鑢 -鑰 -鑵 -鑷 -鑼 -鑽 -鑿 -長 -門 -閃 -閇 -閉 -開 -閏 -閑 -間 -閔 -閘 -関 -閣 -閤 -閥 -閦 -閨 -閬 -閲 -閻 -閼 -閾 -闇 -闍 -闔 -闕 -闘 -關 -闡 -闢 -闥 -阜 -阪 -阮 -阯 -防 -阻 -阿 -陀 -陂 -附 -陌 -降 -限 -陛 -陞 -院 -陣 -除 -陥 -陪 -陬 -陰 -陳 -陵 -陶 -陸 -険 -陽 -隅 -隆 -隈 -隊 -隋 -階 -随 -隔 -際 -障 -隠 -隣 -隧 -隷 -隻 -隼 -雀 -雁 -雄 -雅 -集 -雇 -雉 -雊 -雋 -雌 -雍 -雑 -雖 -雙 -雛 -離 -難 -雨 -雪 -雫 -雰 -雲 -零 -雷 -雹 -電 -需 -震 -霊 -霍 -霖 -霜 -霞 -霧 -霰 -露 -靈 -青 -靖 -静 -靜 -非 -面 -革 -靫 -靭 -靱 -靴 -靺 -鞁 -鞄 -鞆 -鞋 -鞍 -鞏 -鞘 -鞠 -鞨 -鞭 -韋 -韓 -韜 -韮 -音 -韶 -韻 -響 -頁 -頂 -頃 -項 -順 -須 -頌 -預 -頑 -頒 -頓 -領 -頚 -頬 -頭 -頴 -頸 -頻 -頼 -顆 -題 -額 -顎 -顔 -顕 -顗 -願 -顛 -類 -顧 -顯 -風 -飛 -食 -飢 -飩 -飫 -飯 -飲 -飴 -飼 -飽 -飾 -餃 -餅 -餉 -養 -餌 -餐 -餓 -餘 -餝 -餡 -館 -饂 -饅 -饉 -饋 -饌 -饒 -饗 -首 -馗 -香 -馨 -馬 -馳 -馴 -駄 -駅 -駆 -駈 -駐 -駒 -駕 -駝 -駿 -騁 -騎 -騏 -騒 -験 -騙 -騨 -騰 -驕 -驚 -驛 -驢 -骨 -骸 -髄 -體 -高 -髙 -髢 -髪 -髭 -髮 -髷 -髻 -鬘 -鬚 -鬢 -鬨 -鬯 -鬱 -鬼 -魁 -魂 -魄 -魅 -魏 -魔 -魚 -魯 -鮎 -鮑 -鮒 -鮪 -鮫 -鮭 -鮮 -鯉 -鯔 -鯖 -鯛 -鯨 -鯰 -鯱 -鰐 -鰒 -鰭 -鰯 -鰰 -鰹 -鰻 -鱈 -鱒 -鱗 -鱧 -鳥 -鳩 -鳰 -鳳 -鳴 -鳶 -鴈 -鴉 -鴎 -鴛 -鴟 -鴦 -鴨 -鴫 -鴻 -鵄 -鵜 -鵞 -鵡 -鵬 -鵲 -鵺 -鶉 -鶏 -鶯 -鶴 -鷄 -鷙 -鷲 -鷹 -鷺 -鸚 -鸞 -鹸 -鹽 -鹿 -麁 -麒 -麓 -麗 -麝 -麞 -麟 -麦 -麩 -麹 -麺 -麻 -麾 -麿 -黄 -黌 -黍 -黒 -黙 -黛 -黠 -鼈 -鼉 -鼎 -鼓 -鼠 -鼻 -齊 -齋 -齟 -齢 -齬 -龍 -龕 -龗 -! -# -% -& -( -) -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -= -? -@ -A -B -C -D -E -F -G -H -I -J -K -L -M -N -O -P -R -S -T -U -V -W -X -Z -a -c -d -e -f -h -i -j -k -l -m -n -o -p -r -s -t -u -y -z -~ -・ - diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/character.py b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/character.py deleted file mode 100644 index 21dbbd9dc790e3d009f45c1ef1b68c001e9f0e0b..0000000000000000000000000000000000000000 --- a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/character.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import numpy as np -import string - -class CharacterOps(object): - """ Convert between text-label and text-index """ - - def __init__(self, config): - self.character_type = config['character_type'] - self.loss_type = config['loss_type'] - self.max_text_len = config['max_text_length'] - if self.character_type == "en": - self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz" - dict_character = list(self.character_str) - elif self.character_type in [ - "ch", 'japan', 'korean', 'french', 'german' - ]: - character_dict_path = config['character_dict_path'] - add_space = False - if 'use_space_char' in config: - add_space = config['use_space_char'] - self.character_str = "" - with open(character_dict_path, "rb") as fin: - lines = fin.readlines() - for line in lines: - line = line.decode('utf-8').strip("\n").strip("\r\n") - self.character_str += line - if add_space: - self.character_str += " " - dict_character = list(self.character_str) - elif self.character_type == "en_sensitive": - # same with ASTER setting (use 94 char). - self.character_str = string.printable[:-6] - dict_character = list(self.character_str) - else: - self.character_str = None - assert self.character_str is not None, \ - "Nonsupport type of the character: {}".format(self.character_str) - self.beg_str = "sos" - self.end_str = "eos" - if self.loss_type == "attention": - dict_character = [self.beg_str, self.end_str] + dict_character - elif self.loss_type == "srn": - dict_character = dict_character + [self.beg_str, self.end_str] - self.dict = {} - for i, char in enumerate(dict_character): - self.dict[char] = i - self.character = dict_character - - def encode(self, text): - """convert text-label into text-index. - input: - text: text labels of each image. [batch_size] - - output: - text: concatenated text index for CTCLoss. - [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)] - length: length of each text. [batch_size] - """ - if self.character_type == "en": - text = text.lower() - - text_list = [] - for char in text: - if char not in self.dict: - continue - text_list.append(self.dict[char]) - text = np.array(text_list) - return text - - def decode(self, text_index, is_remove_duplicate=False): - """ convert text-index into text-label. """ - char_list = [] - char_num = self.get_char_num() - - if self.loss_type == "attention": - beg_idx = self.get_beg_end_flag_idx("beg") - end_idx = self.get_beg_end_flag_idx("end") - ignored_tokens = [beg_idx, end_idx] - else: - ignored_tokens = [char_num] - - for idx in range(len(text_index)): - if text_index[idx] in ignored_tokens: - continue - if is_remove_duplicate: - if idx > 0 and text_index[idx - 1] == text_index[idx]: - continue - char_list.append(self.character[int(text_index[idx])]) - text = ''.join(char_list) - return text - - def get_char_num(self): - return len(self.character) - - def get_beg_end_flag_idx(self, beg_or_end): - if self.loss_type == "attention": - if beg_or_end == "beg": - idx = np.array(self.dict[self.beg_str]) - elif beg_or_end == "end": - idx = np.array(self.dict[self.end_str]) - else: - assert False, "Unsupport type %s in get_beg_end_flag_idx"\ - % beg_or_end - return idx - else: - err = "error in get_beg_end_flag_idx when using the loss %s"\ - % (self.loss_type) - assert False, err - - -def cal_predicts_accuracy(char_ops, - preds, - preds_lod, - labels, - labels_lod, - is_remove_duplicate=False): - acc_num = 0 - img_num = 0 - for ino in range(len(labels_lod) - 1): - beg_no = preds_lod[ino] - end_no = preds_lod[ino + 1] - preds_text = preds[beg_no:end_no].reshape(-1) - preds_text = char_ops.decode(preds_text, is_remove_duplicate) - - beg_no = labels_lod[ino] - end_no = labels_lod[ino + 1] - labels_text = labels[beg_no:end_no].reshape(-1) - labels_text = char_ops.decode(labels_text, is_remove_duplicate) - img_num += 1 - - if preds_text == labels_text: - acc_num += 1 - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def cal_predicts_accuracy_srn(char_ops, - preds, - labels, - max_text_len, - is_debug=False): - acc_num = 0 - img_num = 0 - - char_num = char_ops.get_char_num() - - total_len = preds.shape[0] - img_num = int(total_len / max_text_len) - for i in range(img_num): - cur_label = [] - cur_pred = [] - for j in range(max_text_len): - if labels[j + i * max_text_len] != int(char_num - 1): #0 - cur_label.append(labels[j + i * max_text_len][0]) - else: - break - - for j in range(max_text_len + 1): - if j < len(cur_label) and preds[j + i * max_text_len][ - 0] != cur_label[j]: - break - elif j == len(cur_label) and j == max_text_len: - acc_num += 1 - break - elif j == len(cur_label) and preds[j + i * max_text_len][0] == int( - char_num - 1): - acc_num += 1 - break - acc = acc_num * 1.0 / img_num - return acc, acc_num, img_num - - -def convert_rec_attention_infer_res(preds): - img_num = preds.shape[0] - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - end_pos = np.where(preds[ino, :] == 1)[0] - if len(end_pos) <= 1: - text_list = preds[ino, 1:] - else: - text_list = preds[ino, 1:end_pos[1]] - target_lod.append(target_lod[ino] + len(text_list)) - convert_ids = convert_ids + list(text_list) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod - - -def convert_rec_label_to_lod(ori_labels): - img_num = len(ori_labels) - target_lod = [0] - convert_ids = [] - for ino in range(img_num): - target_lod.append(target_lod[ino] + len(ori_labels[ino])) - convert_ids = convert_ids + list(ori_labels[ino]) - convert_ids = np.array(convert_ids) - convert_ids = convert_ids.reshape((-1, 1)) - return convert_ids, target_lod diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/module.py index cd04f063496af4a93459ec19a7a46b93f2dab51b..890d9d56be4edd7f2cba2bdd45daa7248067044b 100644 --- a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/module.py +++ b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/module.py @@ -1,304 +1,61 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import ast -import copy -import math -import os -import time - -from paddle.fluid.core import AnalysisConfig, create_paddle_predictor, PaddleTensor -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, runnable, serving -from PIL import Image -import cv2 -import numpy as np -import paddle.fluid as fluid import paddlehub as hub - -from japan_ocr_db_crnn_mobile.character import CharacterOps -from japan_ocr_db_crnn_mobile.utils import base64_to_cv2, draw_ocr, get_image_ext, sorted_boxes +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving @moduleinfo( name="japan_ocr_db_crnn_mobile", - version="1.0.0", - summary= - "The module can recognize the japan texts in an image. Firstly, it will detect the text box positions based on the differentiable_binarization module. Then it recognizes the german texts. ", - author="paddle-dev", - author_email="paddle-dev@baidu.com", + version="1.1.0", + summary="ocr service", + author="PaddlePaddle", type="cv/text_recognition") -class JapanOCRDBCRNNMobile(hub.Module): - def _initialize(self, text_detector_module=None, enable_mkldnn=False, use_angle_classification=False): +class JapanOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): """ initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence """ - self.character_dict_path = os.path.join(self.directory, 'assets', - 'japan_dict.txt') - char_ops_params = { - 'character_type': 'japan', - 'character_dict_path': self.character_dict_path, - 'loss_type': 'ctc', - 'max_text_length': 25, - 'use_space_char': True - } - self.char_ops = CharacterOps(char_ops_params) - self.rec_image_shape = [3, 32, 320] - self._text_detector_module = text_detector_module - self.font_file = os.path.join(self.directory, 'assets', 'japan.ttc') - self.enable_mkldnn = enable_mkldnn - self.use_angle_classification = use_angle_classification - - self.rec_pretrained_model_path = os.path.join( - self.directory, 'inference_model', 'character_rec') - self.rec_predictor, self.rec_input_tensor, self.rec_output_tensors = self._set_config( - self.rec_pretrained_model_path) - - if self.use_angle_classification: - self.cls_pretrained_model_path = os.path.join( - self.directory, 'inference_model', 'angle_cls') - - self.cls_predictor, self.cls_input_tensor, self.cls_output_tensors = self._set_config( - self.cls_pretrained_model_path) - - def _set_config(self, pretrained_model_path): - """ - predictor config path - """ - model_file_path = os.path.join(pretrained_model_path, 'model') - params_file_path = os.path.join(pretrained_model_path, 'params') - - config = AnalysisConfig(model_file_path, params_file_path) - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - - if use_gpu: - config.enable_use_gpu(8000, 0) - else: - config.disable_gpu() - if self.enable_mkldnn: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) - config.enable_mkldnn() - - config.disable_glog_info() - config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass") - config.switch_use_feed_fetch_ops(False) - - predictor = create_paddle_predictor(config) - - input_names = predictor.get_input_names() - input_tensor = predictor.get_input_tensor(input_names[0]) - output_names = predictor.get_output_names() - output_tensors = [] - for output_name in output_names: - output_tensor = predictor.get_output_tensor(output_name) - output_tensors.append(output_tensor) - - return predictor, input_tensor, output_tensors - - @property - def text_detector_module(self): - """ - text detect module - """ - if not self._text_detector_module: - self._text_detector_module = hub.Module( - name='chinese_text_detection_db_mobile', - enable_mkldnn=self.enable_mkldnn, - version='1.0.4') - return self._text_detector_module - - def read_images(self, paths=[]): - images = [] - for img_path in paths: - assert os.path.isfile( - img_path), "The {} isn't a valid file.".format(img_path) - img = cv2.imread(img_path) - if img is None: - logger.info("error in loading image:{}".format(img_path)) - continue - images.append(img) - return images - - def get_rotate_crop_image(self, img, points): - ''' - img_height, img_width = img.shape[0:2] - left = int(np.min(points[:, 0])) - right = int(np.max(points[:, 0])) - top = int(np.min(points[:, 1])) - bottom = int(np.max(points[:, 1])) - img_crop = img[top:bottom, left:right, :].copy() - points[:, 0] = points[:, 0] - left - points[:, 1] = points[:, 1] - top - ''' - img_crop_width = int( - max( - np.linalg.norm(points[0] - points[1]), - np.linalg.norm(points[2] - points[3]))) - img_crop_height = int( - max( - np.linalg.norm(points[0] - points[3]), - np.linalg.norm(points[1] - points[2]))) - pts_std = np.float32([[0, 0], [img_crop_width, 0], - [img_crop_width, img_crop_height], - [0, img_crop_height]]) - M = cv2.getPerspectiveTransform(points, pts_std) - dst_img = cv2.warpPerspective( - img, - M, (img_crop_width, img_crop_height), - borderMode=cv2.BORDER_REPLICATE, - flags=cv2.INTER_CUBIC) - dst_img_height, dst_img_width = dst_img.shape[0:2] - if dst_img_height * 1.0 / dst_img_width >= 1.5: - dst_img = np.rot90(dst_img) - return dst_img - - def resize_norm_img_rec(self, img, max_wh_ratio): - imgC, imgH, imgW = self.rec_image_shape - assert imgC == img.shape[2] - h, w = img.shape[:2] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def resize_norm_img_cls(self, img): - cls_image_shape = [3, 48, 192] - imgC, imgH, imgW = cls_image_shape - h = img.shape[0] - w = img.shape[1] - ratio = w / float(h) - if math.ceil(imgH * ratio) > imgW: - resized_w = imgW - else: - resized_w = int(math.ceil(imgH * ratio)) - resized_image = cv2.resize(img, (resized_w, imgH)) - resized_image = resized_image.astype('float32') - if cls_image_shape[0] == 1: - resized_image = resized_image / 255 - resized_image = resized_image[np.newaxis, :] - else: - resized_image = resized_image.transpose((2, 0, 1)) / 255 - resized_image -= 0.5 - resized_image /= 0.5 - padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) - padding_im[:, :, 0:resized_w] = resized_image - return padding_im - - def recognize_text(self, - images=[], - paths=[], - use_gpu=False, - output_dir='ocr_result', - visualization=False, - box_thresh=0.5, - text_thresh=0.5, - angle_classification_thresh=0.9): - """ - Get the chinese texts in the predicted images. + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="japan", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. Args: images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths paths (list[str]): The paths of images. If paths not images - use_gpu (bool): Whether to use gpu. - batch_size(int): the program deals once with one output_dir (str): The directory to store output images. visualization (bool): Whether to save image or not. - box_thresh(float): the threshold of the detected text box's confidence - text_thresh(float): the threshold of the chinese text recognition confidence - angle_classification_thresh(float): the threshold of the angle classification confidence - Returns: - res (list): The result of chinese texts and save path of images. + res (list): The result of text detection box and save path of images. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES via export CUDA_VISIBLE_DEVICES=cuda_device_id." - ) - - self.use_gpu = use_gpu - - if images != [] and isinstance(images, list) and paths == []: - predicted_data = images - elif images == [] and isinstance(paths, list) and paths != []: - predicted_data = self.read_images(paths) - else: - raise TypeError("The input data is inconsistent with expectations.") - - assert predicted_data != [], "There is not any image to be predicted. Please check the input data." - - detection_results = self.text_detector_module.detect_text( - images=predicted_data, use_gpu=self.use_gpu, box_thresh=box_thresh) - print('*'*10) - print(detection_results) - - boxes = [ - np.array(item['data']).astype(np.float32) - for item in detection_results - ] - all_results = [] - for index, img_boxes in enumerate(boxes): - original_image = predicted_data[index].copy() - result = {'save_path': ''} - if img_boxes.size == 0: - result['data'] = [] - else: - img_crop_list = [] - boxes = sorted_boxes(img_boxes) - for num_box in range(len(boxes)): - tmp_box = copy.deepcopy(boxes[num_box]) - img_crop = self.get_rotate_crop_image( - original_image, tmp_box) - img_crop_list.append(img_crop) - - if self.use_angle_classification: - img_crop_list, angle_list = self._classify_text( - img_crop_list, - angle_classification_thresh=angle_classification_thresh) - - rec_results = self._recognize_text(img_crop_list) - - # if the recognized text confidence score is lower than text_thresh, then drop it - rec_res_final = [] - for index, res in enumerate(rec_results): - text, score = res - if score >= text_thresh: - rec_res_final.append({ - 'text': - text, - 'confidence': - float(score), - 'text_box_position': - boxes[index].astype(np.int).tolist() - }) - result['data'] = rec_res_final - - if visualization and result['data']: - result['save_path'] = self.save_result_image( - original_image, boxes, rec_results, output_dir, - text_thresh) - all_results.append(result) - + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) return all_results @serving @@ -310,282 +67,21 @@ class JapanOCRDBCRNNMobile(hub.Module): results = self.recognize_text(images_decode, **kwargs) return results - def save_result_image( - self, - original_image, - detection_boxes, - rec_results, - output_dir='ocr_result', - text_thresh=0.5, - ): - image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) - txts = [item[0] for item in rec_results] - scores = [item[1] for item in rec_results] - draw_img = draw_ocr( - image, - detection_boxes, - txts, - scores, - font_file=self.font_file, - draw_txt=True, - drop_score=text_thresh) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - ext = get_image_ext(original_image) - saved_name = 'ndarray_{}{}'.format(time.time(), ext) - save_file_path = os.path.join(output_dir, saved_name) - cv2.imwrite(save_file_path, draw_img[:, :, ::-1]) - return save_file_path - - def _classify_text(self, image_list, angle_classification_thresh=0.9): - img_list = copy.deepcopy(image_list) - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the cls process - indices = np.argsort(np.array(width_list)) - - cls_res = [['', 0.0]] * img_num - batch_num = 30 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img_cls(img_list[indices[ino]]) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - norm_img_batch = np.concatenate(norm_img_batch) - norm_img_batch = norm_img_batch.copy() - - self.cls_input_tensor.copy_from_cpu(norm_img_batch) - self.cls_predictor.zero_copy_run() - - prob_out = self.cls_output_tensors[0].copy_to_cpu() - label_out = self.cls_output_tensors[1].copy_to_cpu() - if len(label_out.shape) != 1: - prob_out, label_out = label_out, prob_out - label_list = ['0', '180'] - for rno in range(len(label_out)): - label_idx = label_out[rno] - score = prob_out[rno][label_idx] - label = label_list[label_idx] - cls_res[indices[beg_img_no + rno]] = [label, score] - if '180' in label and score > angle_classification_thresh: - img_list[indices[beg_img_no + rno]] = cv2.rotate( - img_list[indices[beg_img_no + rno]], 1) - return img_list, cls_res - - def _recognize_text(self, img_list): - img_num = len(img_list) - # Calculate the aspect ratio of all text bars - width_list = [] - for img in img_list: - width_list.append(img.shape[1] / float(img.shape[0])) - # Sorting can speed up the recognition process - indices = np.argsort(np.array(width_list)) - - rec_res = [['', 0.0]] * img_num - batch_num = 30 - for beg_img_no in range(0, img_num, batch_num): - end_img_no = min(img_num, beg_img_no + batch_num) - norm_img_batch = [] - max_wh_ratio = 0 - for ino in range(beg_img_no, end_img_no): - h, w = img_list[indices[ino]].shape[0:2] - wh_ratio = w * 1.0 / h - max_wh_ratio = max(max_wh_ratio, wh_ratio) - for ino in range(beg_img_no, end_img_no): - norm_img = self.resize_norm_img_rec(img_list[indices[ino]], - max_wh_ratio) - norm_img = norm_img[np.newaxis, :] - norm_img_batch.append(norm_img) - - norm_img_batch = np.concatenate(norm_img_batch, axis=0) - norm_img_batch = norm_img_batch.copy() - - self.rec_input_tensor.copy_from_cpu(norm_img_batch) - self.rec_predictor.zero_copy_run() - - rec_idx_batch = self.rec_output_tensors[0].copy_to_cpu() - rec_idx_lod = self.rec_output_tensors[0].lod()[0] - predict_batch = self.rec_output_tensors[1].copy_to_cpu() - predict_lod = self.rec_output_tensors[1].lod()[0] - for rno in range(len(rec_idx_lod) - 1): - beg = rec_idx_lod[rno] - end = rec_idx_lod[rno + 1] - rec_idx_tmp = rec_idx_batch[beg:end, 0] - preds_text = self.char_ops.decode(rec_idx_tmp) - beg = predict_lod[rno] - end = predict_lod[rno + 1] - probs = predict_batch[beg:end, :] - ind = np.argmax(probs, axis=1) - blank = probs.shape[1] - valid_ind = np.where(ind != (blank - 1))[0] - if len(valid_ind) == 0: - continue - score = np.mean(probs[valid_ind, ind[valid_ind]]) - # rec_res.append([preds_text, score]) - rec_res[indices[beg_img_no + rno]] = [preds_text, score] - - return rec_res - - def save_inference_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - detector_dir = os.path.join(dirname, 'text_detector') - classifier_dir = os.path.join(dirname, 'angle_classifier') - recognizer_dir = os.path.join(dirname, 'text_recognizer') - self._save_detector_model(detector_dir, model_filename, params_filename, - combined) - if self.use_angle_classification: - self._save_classifier_model(classifier_dir, model_filename, - params_filename, combined) - - self._save_recognizer_model(recognizer_dir, model_filename, - params_filename, combined) - logger.info("The inference model has been saved in the path {}".format( - os.path.realpath(dirname))) - - def _save_detector_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - self.text_detector_module.save_inference_model( - dirname, model_filename, params_filename, combined) - - def _save_recognizer_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.rec_pretrained_model_path, 'model') - params_file_path = os.path.join(self.rec_pretrained_model_path, - 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.rec_pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - - def _save_classifier_model(self, - dirname, - model_filename=None, - params_filename=None, - combined=True): - if combined: - model_filename = "__model__" if not model_filename else model_filename - params_filename = "__params__" if not params_filename else params_filename - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - model_file_path = os.path.join(self.cls_pretrained_model_path, 'model') - params_file_path = os.path.join(self.cls_pretrained_model_path, - 'params') - program, feeded_var_names, target_vars = fluid.io.load_inference_model( - dirname=self.cls_pretrained_model_path, - model_filename=model_file_path, - params_filename=params_file_path, - executor=exe) - - fluid.io.save_inference_model( - dirname=dirname, - main_program=program, - executor=exe, - feeded_var_names=feeded_var_names, - target_vars=target_vars, - model_filename=model_filename, - params_filename=params_filename) - @runnable def run_cmd(self, argvs): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the %s module." % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group( - title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", - description= - "Run configuration for controlling module behavior, not required.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - results = self.recognize_text( - paths=[args.input_path], - use_gpu=args.use_gpu, - output_dir=args.output_dir, - visualization=args.visualization) + results = self.model.run_cmd(argvs) return results - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', - type=ast.literal_eval, - default=False, - help="whether use GPU or not") - self.arg_config_group.add_argument( - '--output_dir', - type=str, - default='ocr_result', - help="The directory to save output images.") - self.arg_config_group.add_argument( - '--visualization', - type=ast.literal_eval, - default=False, - help="whether to save output as images.") - - def add_module_input_arg(self): - """ - Add the command input options - """ - self.arg_input_group.add_argument( - '--input_path', type=str, default=None, help="diretory to image") - + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. -if __name__ == '__main__': - ocr = JapanOCRDBCRNNMobile(enable_mkldnn=False, use_angle_classification=True) - image_path = [ - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/ger_1.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/12.jpg', - '/mnt/zhangxuefei/PaddleOCR/doc/imgs/test_image.jpg' - ] - res = ocr.recognize_text(paths=image_path, visualization=True) - ocr.save_inference_model('save') - print(res) + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/utils.py b/modules/image/text_recognition/japan_ocr_db_crnn_mobile/utils.py deleted file mode 100644 index 8c41af300cc91de369a473cb7327b794b6cf5715..0000000000000000000000000000000000000000 --- a/modules/image/text_recognition/japan_ocr_db_crnn_mobile/utils.py +++ /dev/null @@ -1,190 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -from PIL import Image, ImageDraw, ImageFont -import base64 -import cv2 -import numpy as np - - -def draw_ocr(image, - boxes, - txts, - scores, - font_file, - draw_txt=True, - drop_score=0.5): - """ - Visualize the results of OCR detection and recognition - args: - image(Image|array): RGB image - boxes(list): boxes with shape(N, 4, 2) - txts(list): the texts - scores(list): txxs corresponding scores - draw_txt(bool): whether draw text or not - drop_score(float): only scores greater than drop_threshold will be visualized - return(array): - the visualized img - """ - if scores is None: - scores = [1] * len(boxes) - for (box, score) in zip(boxes, scores): - if score < drop_score or math.isnan(score): - continue - box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) - image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2) - - if draw_txt: - img = np.array(resize_img(image, input_size=600)) - txt_img = text_visual( - txts, - scores, - font_file, - img_h=img.shape[0], - img_w=600, - threshold=drop_score) - img = np.concatenate([np.array(img), np.array(txt_img)], axis=1) - return img - return image - - -def text_visual(texts, scores, font_file, img_h=400, img_w=600, threshold=0.): - """ - create new blank img and draw txt on it - args: - texts(list): the text will be draw - scores(list|None): corresponding score of each txt - img_h(int): the height of blank img - img_w(int): the width of blank img - return(array): - """ - if scores is not None: - assert len(texts) == len( - scores), "The number of txts and corresponding scores must match" - - def create_blank_img(): - blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255 - blank_img[:, img_w - 1:] = 0 - blank_img = Image.fromarray(blank_img).convert("RGB") - draw_txt = ImageDraw.Draw(blank_img) - return blank_img, draw_txt - - blank_img, draw_txt = create_blank_img() - - font_size = 20 - txt_color = (0, 0, 0) - font = ImageFont.truetype(font_file, font_size, encoding="utf-8") - - gap = font_size + 5 - txt_img_list = [] - count, index = 1, 0 - for idx, txt in enumerate(texts): - index += 1 - if scores[idx] < threshold or math.isnan(scores[idx]): - index -= 1 - continue - first_line = True - while str_count(txt) >= img_w // font_size - 4: - tmp = txt - txt = tmp[:img_w // font_size - 4] - if first_line: - new_txt = str(index) + ': ' + txt - first_line = False - else: - new_txt = ' ' + txt - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - txt = tmp[img_w // font_size - 4:] - if count >= img_h // gap - 1: - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - if first_line: - new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx]) - else: - new_txt = " " + txt + " " + '%.3f' % (scores[idx]) - draw_txt.text((0, gap * count), new_txt, txt_color, font=font) - # whether add new blank img or not - if count >= img_h // gap - 1 and idx + 1 < len(texts): - txt_img_list.append(np.array(blank_img)) - blank_img, draw_txt = create_blank_img() - count = 0 - count += 1 - txt_img_list.append(np.array(blank_img)) - if len(txt_img_list) == 1: - blank_img = np.array(txt_img_list[0]) - else: - blank_img = np.concatenate(txt_img_list, axis=1) - return np.array(blank_img) - - -def str_count(s): - """ - Count the number of Chinese characters, - a single English character and a single number - equal to half the length of Chinese characters. - args: - s(string): the input of string - return(int): - the number of Chinese characters - """ - import string - count_zh = count_pu = 0 - s_len = len(s) - en_dg_count = 0 - for c in s: - if c in string.ascii_letters or c.isdigit() or c.isspace(): - en_dg_count += 1 - elif c.isalpha(): - count_zh += 1 - else: - count_pu += 1 - return s_len - math.ceil(en_dg_count / 2) - - -def resize_img(img, input_size=600): - img = np.array(img) - im_shape = img.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(input_size) / float(im_size_max) - im = cv2.resize(img, None, None, fx=im_scale, fy=im_scale) - return im - - -def get_image_ext(image): - if image.shape[2] == 4: - return ".png" - return ".jpg" - - -def sorted_boxes(dt_boxes): - """ - Sort text boxes in order from top to bottom, left to right - args: - dt_boxes(array):detected text boxes with shape [4, 2] - return: - sorted boxes(array) with shape [4, 2] - """ - num_boxes = dt_boxes.shape[0] - sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0])) - _boxes = list(sorted_boxes) - - for i in range(num_boxes - 1): - if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \ - (_boxes[i + 1][0][0] < _boxes[i][0][0]): - tmp = _boxes[i] - _boxes[i] = _boxes[i + 1] - _boxes[i + 1] = tmp - return _boxes - - -def base64_to_cv2(b64str): - data = base64.b64decode(b64str.encode('utf8')) - data = np.fromstring(data, np.uint8) - data = cv2.imdecode(data, cv2.IMREAD_COLOR) - return data diff --git a/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..19f2b1852e4e343241a40bd21b26820928f7506d --- /dev/null +++ b/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/README.md @@ -0,0 +1,165 @@ +# kannada_ocr_db_crnn_mobile + +|模型名称|kannada_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - kannada_ocr_db_crnn_mobile Module用于识别图片当中的卡纳达文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的卡纳达文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别卡纳达文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install kannada_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run kannada_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run kannada_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="kannada_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造KannadaOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m kannada_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/kannada_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install kannada_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..a3825167a9de0d76eef57769ed8ee4606a8fa08a --- /dev/null +++ b/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="kannada_ocr_db_crnn_mobile", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class KannadaOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="ka", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/kannada_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/korean_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/korean_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b11f41b59692912660d613c5a72795993357eda3 --- /dev/null +++ b/modules/image/text_recognition/korean_ocr_db_crnn_mobile/README.md @@ -0,0 +1,169 @@ +# korean_ocr_db_crnn_mobile + +|模型名称|korean_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - korean_ocr_db_crnn_mobile Module用于识别图片当中的韩文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的韩文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别韩文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install french_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run korean_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run korean_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="korean_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造KoreanOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m korean_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/korean_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 优化模型 + - ```shell + $ hub install korean_ocr_db_crnn_mobile==1.1.0 + ``` diff --git a/modules/image/text_recognition/korean_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/korean_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/korean_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/korean_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..916906af68160ccb46c513076ca25ef8853c81c6 --- /dev/null +++ b/modules/image/text_recognition/korean_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="korean_ocr_db_crnn_mobile", + version="1.1.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class KoreanOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="korean", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/korean_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/korean_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/korean_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/latin_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/latin_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c81d839c61daaf6a6ec1a7649b62d7f6698a452e --- /dev/null +++ b/modules/image/text_recognition/latin_ocr_db_crnn_mobile/README.md @@ -0,0 +1,166 @@ +# latin_ocr_db_crnn_mobile + + +|模型名称|latin_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - latin_ocr_db_crnn_mobile Module用于识别图片当中的拉丁文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的拉丁文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别拉丁文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install latin_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run latin_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run latin_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="latin_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造LatinOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m latin_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/latin_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install latin_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/latin_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/latin_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/latin_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/latin_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..40ca5bee4acfd5059cee6c8163e90aee6cbc19ee --- /dev/null +++ b/modules/image/text_recognition/latin_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="latin_ocr_db_crnn_mobile", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class LatinOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="latin", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/latin_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/latin_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/latin_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/README.md b/modules/image/text_recognition/multi_languages_ocr_db_crnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b503731a85f602f6eb4bd6eed9ecf6fa7aa8a0ac --- /dev/null +++ b/modules/image/text_recognition/multi_languages_ocr_db_crnn/README.md @@ -0,0 +1,223 @@ +# multi_languages_ocr_db_crnn + +|模型名称|multi_languages_ocr_db_crnn| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+RCNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-11-24| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 应用效果展示 + - 样例结果示例: +

+
+

+ +- ### 模型介绍 + + - multi_languages_ocr_db_crnn Module用于识别图片当中的文字。其基于PaddleOCR模块,检测得到文本框,识别文本框中的文字,再对检测文本框进行角度分类。最终检测算法采用DB(Differentiable Binarization),而识别文字算法则采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。 + 该Module不仅提供了通用场景下的中英文模型,也提供了[80个语言](#语种缩写)的小语种模型。 + + +

+
+

+ + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install multi_languages_ocr_db_crnn + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run multi_languages_ocr_db_crnn --input_path "/PATH/TO/IMAGE" + $ hub run multi_languages_ocr_db_crnn --input_path "/PATH/TO/IMAGE" --lang "ch" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="multi_languages_ocr_db_crnn", lang='en', enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + - multi_languages_ocr_db_crnn目前支持80个语种,可以通过修改lang参数进行切换,对于英文模型,指定lang=en,具体支持的[语种](#语种缩写)可查看表格。 + +- ### 3、API + + - ```python + def __init__(self, + lang="ch", + det=True, rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造MultiLangOCR对象 + + - **参数** + - lang(str): 多语言模型选择。默认为中文模型,即lang="ch"。 + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m multi_languages_ocr_db_crnn + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/multi_languages_ocr_db_crnn" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + + +## 五、支持语种及缩写 + +| 语种 | 描述 | 缩写 | | 语种 | 描述 | 缩写 | +| --- | --- | --- | ---|--- | --- | --- | +|中文|chinese and english|ch| |保加利亚文|Bulgarian |bg| +|英文|english|en| |乌克兰文|Ukranian|uk| +|法文|french|fr| |白俄罗斯文|Belarusian|be| +|德文|german|german| |泰卢固文|Telugu |te| +|日文|japan|japan| | 阿巴扎文 | Abaza | abq | +|韩文|korean|korean| |泰米尔文|Tamil |ta| +|中文繁体|chinese traditional |chinese_cht| |南非荷兰文 |Afrikaans |af| +|意大利文| Italian |it| |阿塞拜疆文 |Azerbaijani |az| +|西班牙文|Spanish |es| |波斯尼亚文|Bosnian|bs| +|葡萄牙文| Portuguese|pt| |捷克文|Czech|cs| +|俄罗斯文|Russia|ru| |威尔士文 |Welsh |cy| +|阿拉伯文|Arabic|ar| |丹麦文 |Danish|da| +|印地文|Hindi|hi| |爱沙尼亚文 |Estonian |et| +|维吾尔|Uyghur|ug| |爱尔兰文 |Irish |ga| +|波斯文|Persian|fa| |克罗地亚文|Croatian |hr| +|乌尔都文|Urdu|ur| |匈牙利文|Hungarian |hu| +|塞尔维亚文(latin)| Serbian(latin) |rs_latin| |印尼文|Indonesian|id| +|欧西坦文|Occitan |oc| |冰岛文 |Icelandic|is| +|马拉地文|Marathi|mr| |库尔德文 |Kurdish|ku| +|尼泊尔文|Nepali|ne| |立陶宛文|Lithuanian |lt| +|塞尔维亚文(cyrillic)|Serbian(cyrillic)|rs_cyrillic| |拉脱维亚文 |Latvian |lv| +|毛利文|Maori|mi| | 达尔瓦文|Dargwa |dar| +|马来文 |Malay|ms| | 因古什文|Ingush |inh| +|马耳他文 |Maltese |mt| | 拉克文|Lak |lbe| +|荷兰文 |Dutch |nl| | 莱兹甘文|Lezghian |lez| +|挪威文 |Norwegian |no| |塔巴萨兰文 |Tabassaran |tab| +|波兰文|Polish |pl| | 比尔哈文|Bihari |bh| +| 罗马尼亚文|Romanian |ro| | 迈蒂利文|Maithili |mai| +| 斯洛伐克文|Slovak |sk| | 昂加文|Angika |ang| +| 斯洛文尼亚文|Slovenian |sl| | 孟加拉文|Bhojpuri |bho| +| 阿尔巴尼亚文|Albanian |sq| | 摩揭陀文 |Magahi |mah| +| 瑞典文|Swedish |sv| | 那格浦尔文|Nagpur |sck| +| 西瓦希里文|Swahili |sw| | 尼瓦尔文|Newari |new| +| 塔加洛文|Tagalog |tl| | 保加利亚文 |Goan Konkani|gom| +| 土耳其文|Turkish |tr| | 沙特阿拉伯文|Saudi Arabia|sa| +| 乌兹别克文|Uzbek |uz| | 阿瓦尔文|Avar |ava| +| 越南文|Vietnamese |vi| | 阿瓦尔文|Avar |ava| +| 蒙古文|Mongolian |mn| | 阿迪赫文|Adyghe |ady| + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install multi_languages_ocr_db_crnn==1.0.0 + ``` diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/__init__.py b/modules/image/text_recognition/multi_languages_ocr_db_crnn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/arabic.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/arabic.ttf new file mode 100644 index 0000000000000000000000000000000000000000..064b6041ee32814d852e084f639dae75d044d357 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/arabic.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/cyrillic.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/cyrillic.ttf new file mode 100644 index 0000000000000000000000000000000000000000..be4bf6605808d15ab25c9cbbe1fda2a1d190ac8b Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/cyrillic.ttf differ diff --git a/modules/image/text_recognition/german_ocr_db_crnn_mobile/assets/german.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/french.ttf similarity index 100% rename from modules/image/text_recognition/german_ocr_db_crnn_mobile/assets/german.ttf rename to modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/french.ttf diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/german.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/german.ttf new file mode 100644 index 0000000000000000000000000000000000000000..ab68fb197d4479b3b6dec6e85bd5cbaf433a87c5 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/german.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/hindi.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/hindi.ttf new file mode 100644 index 0000000000000000000000000000000000000000..8b0c36f5868b935464f30883094b9556c3e41009 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/hindi.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/kannada.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/kannada.ttf new file mode 100644 index 0000000000000000000000000000000000000000..43b60d423ad5ea5f5528c9c9e5d6f013f87fa1d7 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/kannada.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/korean.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/korean.ttf new file mode 100644 index 0000000000000000000000000000000000000000..e638ce37f67ff1cd9babf73387786eaeb5c52968 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/korean.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/latin.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/latin.ttf new file mode 100644 index 0000000000000000000000000000000000000000..e392413ac2f82905b3c07073669c3e2058d20235 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/latin.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/marathi.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/marathi.ttf new file mode 100644 index 0000000000000000000000000000000000000000..a796d3edc6a4cc140a9360d0fc502a9d99352db0 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/marathi.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/nepali.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/nepali.ttf new file mode 100644 index 0000000000000000000000000000000000000000..8b0c36f5868b935464f30883094b9556c3e41009 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/nepali.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/persian.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/persian.ttf new file mode 100644 index 0000000000000000000000000000000000000000..bdb1c8d7402148127b7633c6b4cd1586e23745ab Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/persian.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/simfang.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/simfang.ttf new file mode 100644 index 0000000000000000000000000000000000000000..2b59eae4195d1cdbea375503c0cc34d5631cb0f9 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/simfang.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/spanish.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/spanish.ttf new file mode 100644 index 0000000000000000000000000000000000000000..532353d2778cd2bb37a5baf06f5daeea32729168 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/spanish.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/tamil.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/tamil.ttf new file mode 100644 index 0000000000000000000000000000000000000000..2e9998e8d8218f1e868f06ba0db3e13b4620eed1 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/tamil.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/telugu.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/telugu.ttf new file mode 100644 index 0000000000000000000000000000000000000000..12c91e41973a4704f52984e2089fdb2eaf1ed4a5 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/telugu.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/urdu.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/urdu.ttf new file mode 100644 index 0000000000000000000000000000000000000000..625feee2e9616809c13e17eeb7da1aec58988b65 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/urdu.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/uyghur.ttf b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/uyghur.ttf new file mode 100644 index 0000000000000000000000000000000000000000..625feee2e9616809c13e17eeb7da1aec58988b65 Binary files /dev/null and b/modules/image/text_recognition/multi_languages_ocr_db_crnn/assets/fonts/uyghur.ttf differ diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/module.py b/modules/image/text_recognition/multi_languages_ocr_db_crnn/module.py new file mode 100644 index 0000000000000000000000000000000000000000..2e598f80634e282e66a374f035df24a7f8201769 --- /dev/null +++ b/modules/image/text_recognition/multi_languages_ocr_db_crnn/module.py @@ -0,0 +1,220 @@ +import argparse +import sys +import os +import ast + +import paddle +import paddle2onnx +import paddle2onnx as p2o +import paddle.fluid as fluid +from paddleocr import PaddleOCR +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + +from .utils import read_images, save_result_image, mkdir + + +@moduleinfo( + name="multi_languages_ocr_db_crnn", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class MultiLangOCR: + def __init__(self, + lang="ch", + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + lang(str): the selection of languages + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.lang = lang + self.logger = get_logger() + argc = len(sys.argv) + if argc == 1 or argc > 1 and sys.argv[1] == 'serving': + self.det = det + self.rec = rec + self.use_angle_cls = use_angle_cls + self.engine = PaddleOCR( + lang=lang, + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + det_db_box_thresh=box_thresh, + cls_thresh=angle_classification_thresh) + self.det_model_dir = self.engine.text_detector.args.det_model_dir + self.rec_model_dir = self.engine.text_detector.args.rec_model_dir + self.cls_model_dir = self.engine.text_detector.args.cls_model_dir + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + + if images != [] and isinstance(images, list) and paths == []: + predicted_data = images + elif images == [] and isinstance(paths, list) and paths != []: + predicted_data = read_images(paths) + else: + raise TypeError("The input data is inconsistent with expectations.") + + assert predicted_data != [], "There is not any image to be predicted. Please check the input data." + all_results = [] + for img in predicted_data: + result = {'save_path': ''} + if img is None: + result['data'] = [] + all_results.append(result) + continue + original_image = img.copy() + rec_results = self.engine.ocr(img, det=self.det, rec=self.rec, cls=self.use_angle_cls) + rec_res_final = [] + for line in rec_results: + if self.det and self.rec: + boxes = line[0] + text, score = line[1] + rec_res_final.append({'text': text, 'confidence': float(score), 'text_box_position': boxes}) + elif self.det and not self.rec: + boxes = line + rec_res_final.append({'text_box_position': boxes}) + else: + if self.use_angle_cls and not self.rec: + orientation, score = line + rec_res_final.append({'orientation': orientation, 'score': float(score)}) + else: + text, score = line + rec_res_final.append({'text': text, 'confidence': float(score)}) + + result['data'] = rec_res_final + if visualization and result['data']: + result['save_path'] = save_result_image(original_image, rec_results, output_dir, self.directory, + self.lang, self.det, self.rec, self.logger) + + all_results.append(result) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + parser = self.arg_parser() + args = parser.parse_args(argvs) + if args.lang is not None: + self.lang = args.lang + self.det = args.det + self.rec = args.rec + self.use_angle_cls = args.use_angle_cls + self.engine = PaddleOCR( + lang=self.lang, + det=args.det, + rec=args.rec, + use_angle_cls=args.use_angle_cls, + enable_mkldnn=args.enable_mkldnn, + use_gpu=args.use_gpu, + det_db_box_thresh=args.box_thresh, + cls_thresh=args.angle_classification_thresh) + results = self.recognize_text( + paths=[args.input_path], output_dir=args.output_dir, visualization=args.visualization) + return results + + def arg_parser(self): + parser = argparse.ArgumentParser( + description="Run the %s module." % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + parser.add_argument('--input_path', type=str, default=None, help="diretory to image. Required.", required=True) + parser.add_argument('--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + parser.add_argument('--output_dir', type=str, default='ocr_result', help="The directory to save output images.") + parser.add_argument( + '--visualization', type=ast.literal_eval, default=False, help="whether to save output as images.") + parser.add_argument('--lang', type=str, default=None, help="the selection of languages") + parser.add_argument('--det', type=ast.literal_eval, default=True, help="whether use text detector or not") + parser.add_argument('--rec', type=ast.literal_eval, default=True, help="whether use text recognizer or not") + parser.add_argument( + '--use_angle_cls', type=ast.literal_eval, default=False, help="whether text orientation classifier or not") + parser.add_argument('--enable_mkldnn', type=ast.literal_eval, default=False, help="whether use mkldnn or not") + parser.add_argument( + "--box_thresh", type=float, default=0.6, help="set the threshold of the detected text box's confidence") + parser.add_argument( + "--angle_classification_thresh", + type=float, + default=0.9, + help="set the threshold of the angle classification confidence") + + return parser + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + v0, v1, v2 = paddle2onnx.__version__.split('.') + if int(v1) < 9: + raise ImportError("paddle2onnx>=0.9.0 is required") + + if input_shape_dict is not None and not isinstance(input_shape_dict, dict): + raise Exception("input_shape_dict should be dict, eg. {'x': [-1, 3, -1, -1]}.") + + if opset_version <= 9: + raise Exception("opset_version <= 9 is not surpported, please try with higher opset_version >=10.") + + path_dict = {"det": self.det_model_dir, "rec": self.rec_model_dir, "cls": self.cls_model_dir} + for (key, path) in path_dict.items(): + model_filename = 'inference.pdmodel' + params_filename = 'inference.pdiparams' + save_file = os.path.join(dirname, '{}_{}.onnx'.format(self.name, key)) + + # convert model save with 'paddle.fluid.io.save_inference_model' + if hasattr(paddle, 'enable_static'): + paddle.enable_static() + exe = fluid.Executor(fluid.CPUPlace()) + if model_filename is None and params_filename is None: + [program, feed_var_names, fetch_vars] = fluid.io.load_inference_model(path, exe) + else: + [program, feed_var_names, fetch_vars] = fluid.io.load_inference_model( + path, exe, model_filename=model_filename, params_filename=params_filename) + + onnx_proto = p2o.run_convert(program, input_shape_dict=input_shape_dict, opset_version=opset_version) + mkdir(save_file) + with open(save_file, "wb") as f: + f.write(onnx_proto.SerializeToString()) diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/requirements.txt b/modules/image/text_recognition/multi_languages_ocr_db_crnn/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/multi_languages_ocr_db_crnn/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/multi_languages_ocr_db_crnn/utils.py b/modules/image/text_recognition/multi_languages_ocr_db_crnn/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..e64e791e5e4e62bc90f73ad0698403028bd9bf9b --- /dev/null +++ b/modules/image/text_recognition/multi_languages_ocr_db_crnn/utils.py @@ -0,0 +1,100 @@ +import os +import time + +import cv2 +import numpy as np +from PIL import Image, ImageDraw + +from paddleocr import draw_ocr + + +def save_result_image(original_image, + rec_results, + output_dir='ocr_result', + directory=None, + lang='ch', + det=True, + rec=True, + logger=None): + image = Image.fromarray(cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)) + if det and rec: + boxes = [line[0] for line in rec_results] + txts = [line[1][0] for line in rec_results] + scores = [line[1][1] for line in rec_results] + fonts_lang = 'fonts/simfang.ttf' + lang_fonts = { + 'korean': 'korean', + 'fr': 'french', + 'german': 'german', + 'hi': 'hindi', + 'ne': 'nepali', + 'fa': 'persian', + 'es': 'spanish', + 'ta': 'tamil', + 'te': 'telugu', + 'ur': 'urdu', + 'ug': 'uyghur', + } + if lang in lang_fonts.keys(): + fonts_lang = 'fonts/' + lang_fonts[lang] + '.ttf' + font_file = os.path.join(directory, 'assets', fonts_lang) + im_show = draw_ocr(image, boxes, txts, scores, font_path=font_file) + elif det and not rec: + boxes = rec_results + im_show = draw_boxes(image, boxes) + im_show = np.array(im_show) + else: + logger.warning("only cls or rec not supported visualization.") + return "" + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + ext = get_image_ext(original_image) + saved_name = 'ndarray_{}{}'.format(time.time(), ext) + save_file_path = os.path.join(output_dir, saved_name) + im_show = Image.fromarray(im_show) + im_show.save(save_file_path) + return save_file_path + + +def read_images(paths=[]): + images = [] + for img_path in paths: + assert os.path.isfile(img_path), "The {} isn't a valid file.".format(img_path) + img = cv2.imread(img_path) + if img is None: + continue + images.append(img) + return images + + +def draw_boxes(image, boxes, scores=None, drop_score=0.5): + img = image.copy() + draw = ImageDraw.Draw(img) + if scores is None: + scores = [1] * len(boxes) + for (box, score) in zip(boxes, scores): + if score < drop_score: + continue + draw.line([(box[0][0], box[0][1]), (box[1][0], box[1][1])], fill='red') + draw.line([(box[1][0], box[1][1]), (box[2][0], box[2][1])], fill='red') + draw.line([(box[2][0], box[2][1]), (box[3][0], box[3][1])], fill='red') + draw.line([(box[3][0], box[3][1]), (box[0][0], box[0][1])], fill='red') + draw.line([(box[0][0] - 1, box[0][1] + 1), (box[1][0] - 1, box[1][1] + 1)], fill='red') + draw.line([(box[1][0] - 1, box[1][1] + 1), (box[2][0] - 1, box[2][1] + 1)], fill='red') + draw.line([(box[2][0] - 1, box[2][1] + 1), (box[3][0] - 1, box[3][1] + 1)], fill='red') + draw.line([(box[3][0] - 1, box[3][1] + 1), (box[0][0] - 1, box[0][1] + 1)], fill='red') + return img + + +def get_image_ext(image): + if image.shape[2] == 4: + return ".png" + return ".jpg" + + +def mkdir(path): + sub_dir = os.path.dirname(path) + if not os.path.exists(sub_dir): + os.makedirs(sub_dir) diff --git a/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..218bfaadff6fd5b43de3a9a79d8bab8b407a6237 --- /dev/null +++ b/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/README.md @@ -0,0 +1,165 @@ +# tamil_ocr_db_crnn_mobile + +|模型名称|tamil_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - tamil_ocr_db_crnn_mobile Module用于识别图片当中的泰米尔文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的泰米尔文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别泰米尔文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install tamil_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run tamil_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run tamil_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="tamil_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造TamilOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m tamil_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/tamil_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install tamil_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..22321babd3812e3f39f9670b6aa6ce2a180a5a3f --- /dev/null +++ b/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="tamil_ocr_db_crnn_mobile", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class TamilOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="ta", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/tamil_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/README.md b/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bcf56dfb90cabf06060bf972ccefabd062552973 --- /dev/null +++ b/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/README.md @@ -0,0 +1,165 @@ +# telugu_ocr_db_crnn_mobile + +|模型名称|telugu_ocr_db_crnn_mobile| +| :--- | :---: | +|类别|图像-文字识别| +|网络|Differentiable Binarization+CRNN| +|数据集|icdar2015数据集| +|是否支持Fine-tuning|否| +|最新更新日期|2021-12-2| +|数据指标|-| + + +## 一、模型基本信息 + +- ### 模型介绍 + + - telugu_ocr_db_crnn_mobile Module用于识别图片当中的泰卢固文。其基于multi_languages_ocr_db_crnn检测得到的文本框,继续识别文本框中的泰卢固文文字。最终识别文字算法采用CRNN(Convolutional Recurrent Neural Network)即卷积递归神经网络。其是DCNN和RNN的组合,专门用于识别图像中的序列式对象。与CTC loss配合使用,进行文字识别,可以直接从文本词级或行级的标注中学习,不需要详细的字符级的标注。该Module是一个识别泰卢固文的轻量级OCR模型,支持直接预测。 + + - 更多详情参考: + - [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf) + - [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/pdf/1507.05717.pdf) + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.2 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install telugu_ocr_db_crnn_mobile + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run telugu_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" + $ hub run telugu_ocr_db_crnn_mobile --input_path "/PATH/TO/IMAGE" --det True --rec True --use_angle_cls True --box_thresh 0.7 --angle_classification_thresh 0.8 --visualization True + ``` + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + import cv2 + + ocr = hub.Module(name="telugu_ocr_db_crnn_mobile", enable_mkldnn=True) # mkldnn加速仅在CPU下有效 + result = ocr.recognize_text(images=[cv2.imread('/PATH/TO/IMAGE')]) + + # or + # result = ocr.recognize_text(paths=['/PATH/TO/IMAGE']) + ``` + +- ### 3、API + + - ```python + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9) + ``` + + - 构造TeluguOCRDBCRNNMobile对象 + + - **参数** + - det(bool): 是否开启文字检测。默认为True。 + - rec(bool): 是否开启文字识别。默认为True。 + - use_angle_cls(bool): 是否开启方向分类, 用于设置使用方向分类器识别180度旋转文字。默认为False。 + - enable_mkldnn(bool): 是否开启mkldnn加速CPU计算。该参数仅在CPU运行下设置有效。默认为False。 + - use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA_VISIBLE_DEVICES环境变量** + - box\_thresh (float): 检测文本框置信度的阈值; + - angle_classification_thresh(float): 文本方向分类置信度的阈值 + + + - ```python + def recognize_text(images=[], + paths=[], + output_dir='ocr_result', + visualization=False) + ``` + + - 预测API,检测输入图片中的所有文本的位置和识别文本结果。 + + - **参数** + + - paths (list\[str\]): 图片的路径; + - images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],BGR格式; + - output\_dir (str): 图片的保存路径,默认设为 ocr\_result; + - visualization (bool): 是否将识别结果保存为图片文件, 仅有检测开启时有效, 默认为False; + + - **返回** + + - res (list\[dict\]): 识别结果的列表,列表中每一个元素为 dict,各字段为: + - data (list\[dict\]): 识别文本结果,列表中每一个元素为 dict,各字段为: + - text(str): 识别得到的文本 + - confidence(float): 识别文本结果置信度 + - text_box_position(list): 文本框在原图中的像素坐标,4*2的矩阵,依次表示文本框左下、右下、右上、左上顶点的坐标,如果无识别结果则data为\[\] + - orientation(str): 分类的方向,仅在只有方向分类开启时输出 + - score(float): 分类的得分,仅在只有方向分类开启时输出 + - save_path (str, optional): 识别结果的保存路径,如不保存图片则save_path为'' + + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个目标检测的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m telugu_ocr_db_crnn_mobile + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + import cv2 + import base64 + + def cv2_to_base64(image): + data = cv2.imencode('.jpg', image)[1] + return base64.b64encode(data.tostring()).decode('utf8') + + # 发送HTTP请求 + data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/telugu_ocr_db_crnn_mobile" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(r.json()["results"]) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + - ```shell + $ hub install telugu_ocr_db_crnn_mobile==1.0.0 + ``` diff --git a/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/__init__.py b/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/module.py b/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/module.py new file mode 100644 index 0000000000000000000000000000000000000000..7cfd283a93c300daa080077cb8369323364ee20a --- /dev/null +++ b/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/module.py @@ -0,0 +1,87 @@ +import paddlehub as hub +from paddleocr.ppocr.utils.logging import get_logger +from paddleocr.tools.infer.utility import base64_to_cv2 +from paddlehub.module.module import moduleinfo, runnable, serving + + +@moduleinfo( + name="telugu_ocr_db_crnn_mobile", + version="1.0.0", + summary="ocr service", + author="PaddlePaddle", + type="cv/text_recognition") +class TeluguOCRDBCRNNMobile: + def __init__(self, + det=True, + rec=True, + use_angle_cls=False, + enable_mkldnn=False, + use_gpu=False, + box_thresh=0.6, + angle_classification_thresh=0.9): + """ + initialize with the necessary elements + Args: + det(bool): Whether to use text detector. + rec(bool): Whether to use text recognizer. + use_angle_cls(bool): Whether to use text orientation classifier. + enable_mkldnn(bool): Whether to enable mkldnn. + use_gpu (bool): Whether to use gpu. + box_thresh(float): the threshold of the detected text box's confidence + angle_classification_thresh(float): the threshold of the angle classification confidence + """ + self.logger = get_logger() + self.model = hub.Module( + name="multi_languages_ocr_db_crnn", + lang="te", + det=det, + rec=rec, + use_angle_cls=use_angle_cls, + enable_mkldnn=enable_mkldnn, + use_gpu=use_gpu, + box_thresh=box_thresh, + angle_classification_thresh=angle_classification_thresh) + self.model.name = self.name + + def recognize_text(self, images=[], paths=[], output_dir='ocr_result', visualization=False): + """ + Get the text in the predicted images. + Args: + images (list(numpy.ndarray)): images data, shape of each is [H, W, C]. If images not paths + paths (list[str]): The paths of images. If paths not images + output_dir (str): The directory to store output images. + visualization (bool): Whether to save image or not. + Returns: + res (list): The result of text detection box and save path of images. + """ + all_results = self.model.recognize_text( + images=images, paths=paths, output_dir=output_dir, visualization=visualization) + return all_results + + @serving + def serving_method(self, images, **kwargs): + """ + Run as a service. + """ + images_decode = [base64_to_cv2(image) for image in images] + results = self.recognize_text(images_decode, **kwargs) + return results + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + results = self.model.run_cmd(argvs) + return results + + def export_onnx_model(self, dirname: str, input_shape_dict=None, opset_version=10): + ''' + Export the model to ONNX format. + + Args: + dirname(str): The directory to save the onnx model. + input_shape_dict: dictionary ``{ input_name: input_value }, eg. {'x': [-1, 3, -1, -1]}`` + opset_version(int): operator set + ''' + self.model.export_onnx_model(dirname=dirname, input_shape_dict=input_shape_dict, opset_version=opset_version) diff --git a/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/requirements.txt b/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..527c6de7f643cb427013aaff2409365538fed2d3 --- /dev/null +++ b/modules/image/text_recognition/telugu_ocr_db_crnn_mobile/requirements.txt @@ -0,0 +1,4 @@ +paddleocr>=2.3.0.2 +paddle2onnx>=0.9.0 +shapely +pyclipper diff --git a/modules/text/embedding/fasttext_crawl_target_word-word_dim300_en/README.md b/modules/text/embedding/fasttext_crawl_target_word-word_dim300_en/README.md index 0dd8bd27346fbc3f636f1a4f189a001fa48edbdc..4f8d0c293c895b3c4ec66eb32c0d4a4053bb0ab0 100644 --- a/modules/text/embedding/fasttext_crawl_target_word-word_dim300_en/README.md +++ b/modules/text/embedding/fasttext_crawl_target_word-word_dim300_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# fasttext_crawl_target_word-word_dim300_en +|模型名称|fasttext_crawl_target_word-word_dim300_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|fasttext| +|数据集|crawl| +|是否支持Fine-tuning|否| +|文件大小|1.19GB| +|词表大小|2000002| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install fasttext_crawl_target_word-word_dim300_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='fasttext_crawl_target_word-word_dim300_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='fasttext_crawl_target_word-word_dim300_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m fasttext_crawl_target_word-word_dim300_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/fasttext_crawl_target_word-word_dim300_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 + + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 查看代码 -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings +## 四、部署服务 -## 依赖 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlepaddle >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -paddlehub >= 2.0.0 + - 运行启动命令: -## 更新历史 + - ```shell + $ hub serving start -m fasttext_crawl_target_word-word_dim300_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/fasttext_crawl_target_word-word_dim300_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install fasttext_crawl_target_word-word_dim300_en==1.0.1 + ``` diff --git a/modules/text/embedding/fasttext_wiki-news_target_word-word_dim300_en/README.md b/modules/text/embedding/fasttext_wiki-news_target_word-word_dim300_en/README.md index dd1edcb69c446c7eb552c9a2c1058c48e997fad3..9bba94ff64ebd09faf794ab8c7af17fab55ec120 100644 --- a/modules/text/embedding/fasttext_wiki-news_target_word-word_dim300_en/README.md +++ b/modules/text/embedding/fasttext_wiki-news_target_word-word_dim300_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# fasttext_wiki-news_target_word-word_dim300_en +|模型名称|fasttext_wiki-news_target_word-word_dim300_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|fasttext| +|数据集|wiki-news| +|是否支持Fine-tuning|否| +|文件大小|541.63MB| +|词表大小|999996| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install fasttext_wiki-news_target_word-word_dim300_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='fasttext_wiki-news_target_word-word_dim300_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='fasttext_wiki-news_target_word-word_dim300_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m fasttext_wiki-news_target_word-word_dim300_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/fasttext_wiki-news_target_word-word_dim300_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m fasttext_wiki-news_target_word-word_dim300_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/fasttext_wiki-news_target_word-word_dim300_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install fasttext_wiki-news_target_word-word_dim300_en==1.0.1 + ``` diff --git a/modules/text/embedding/glove_twitter_target_word-word_dim100_en/README.md b/modules/text/embedding/glove_twitter_target_word-word_dim100_en/README.md index 30c61f55aa457a72ecb9e1f5044af6e30c8d7e10..3e9b300ba0d989b1cdd9255565a6b31d3680ac88 100644 --- a/modules/text/embedding/glove_twitter_target_word-word_dim100_en/README.md +++ b/modules/text/embedding/glove_twitter_target_word-word_dim100_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_twitter_target_word-word_dim100_en +|模型名称|glove_twitter_target_word-word_dim100_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|glove| +|数据集|crawl| +|是否支持Fine-tuning|否| +|文件大小|431.08MB| +|词表大小|1193516| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_twitter_target_word-word_dim100_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_twitter_target_word-word_dim100_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_twitter_target_word-word_dim100_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_twitter_target_word-word_dim100_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_twitter_target_word-word_dim100_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_twitter_target_word-word_dim100_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_twitter_target_word-word_dim100_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_twitter_target_word-word_dim100_en==1.0.1 + ``` diff --git a/modules/text/embedding/glove_twitter_target_word-word_dim200_en/README.md b/modules/text/embedding/glove_twitter_target_word-word_dim200_en/README.md index a1f31284b35cbe9f19c8525e3eccc38ee5a15fba..b80d1d7d316794c451ad3c02ae60e21efefcf65c 100644 --- a/modules/text/embedding/glove_twitter_target_word-word_dim200_en/README.md +++ b/modules/text/embedding/glove_twitter_target_word-word_dim200_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_twitter_target_word-word_dim200_en +|模型名称|glove_twitter_target_word-word_dim200_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|fasttext| +|数据集|twitter| +|是否支持Fine-tuning|否| +|文件大小|848.56MB| +|词表大小|1193516| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_twitter_target_word-word_dim200_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_twitter_target_word-word_dim200_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_twitter_target_word-word_dim200_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_twitter_target_word-word_dim200_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_twitter_target_word-word_dim200_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_twitter_target_word-word_dim200_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_twitter_target_word-word_dim200_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_twitter_target_word-word_dim200_en==1.0.1 + ``` diff --git a/modules/text/embedding/glove_twitter_target_word-word_dim25_en/README.md b/modules/text/embedding/glove_twitter_target_word-word_dim25_en/README.md index ab60490600457b6fa8c0a83775f21ec69efd0041..6baea2fd59fd599fd14586a57a64bbbab762d8bd 100644 --- a/modules/text/embedding/glove_twitter_target_word-word_dim25_en/README.md +++ b/modules/text/embedding/glove_twitter_target_word-word_dim25_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_twitter_target_word-word_dim25_en +|模型名称|glove_twitter_target_word-word_dim25_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|glove| +|数据集|twitter| +|是否支持Fine-tuning|否| +|文件大小|116.92MB| +|词表大小|1193516| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_twitter_target_word-word_dim25_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_twitter_target_word-word_dim25_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_twitter_target_word-word_dim25_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_twitter_target_word-word_dim25_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_twitter_target_word-word_dim25_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_twitter_target_word-word_dim25_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_twitter_target_word-word_dim25_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_twitter_target_word-word_dim25_en==1.0.1 + ``` diff --git a/modules/text/embedding/glove_twitter_target_word-word_dim50_en/README.md b/modules/text/embedding/glove_twitter_target_word-word_dim50_en/README.md index 6770c0d3423273836a91c49a9b06233d704a44a1..029baf8d1a4c5fc8611e334d9d18493175baf9b4 100644 --- a/modules/text/embedding/glove_twitter_target_word-word_dim50_en/README.md +++ b/modules/text/embedding/glove_twitter_target_word-word_dim50_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_twitter_target_word-word_dim50_en +|模型名称|glove_twitter_target_word-word_dim50_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|glove| +|数据集|twitter| +|是否支持Fine-tuning|否| +|文件大小|221.64MB| +|词表大小|1193516| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_twitter_target_word-word_dim50_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_twitter_target_word-word_dim50_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_twitter_target_word-word_dim50_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_twitter_target_word-word_dim50_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_twitter_target_word-word_dim50_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_twitter_target_word-word_dim50_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_twitter_target_word-word_dim50_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_twitter_target_word-word_dim50_en==1.0.1 + ``` diff --git a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim100_en/README.md b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim100_en/README.md index cbe1760caa2f14f669da5eba67317b98f7ef9b8e..b3eedc46acb4744c9299c885e583d39a82d88fef 100644 --- a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim100_en/README.md +++ b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim100_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_wiki2014-gigaword_target_word-word_dim100_en +|模型名称|glove_wiki2014-gigaword_target_word-word_dim100_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|glove| +|数据集|wiki2014-gigaword| +|是否支持Fine-tuning|否| +|文件大小|143.30MB| +|词表大小|400002| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim100_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim100_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim100_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim100_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_wiki2014-gigaword_target_word-word_dim100_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim100_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_wiki2014-gigaword_target_word-word_dim100_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim50_en==1.0.1 + ``` diff --git a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim200_en/README.md b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim200_en/README.md index ea8e7872e3bd4928e856735be6fe02e98e2c52a5..68c5ecd57d4d72b7f8b7059c952d689bfaf9f4dc 100644 --- a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim200_en/README.md +++ b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim200_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_wiki2014-gigaword_target_word-word_dim200_en +|模型名称|glove_wiki2014-gigaword_target_word-word_dim200_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|glove| +|数据集|wiki2014-gigaword| +|是否支持Fine-tuning|否| +|文件大小|282.97MB| +|词表大小|400002| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim200_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim200_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim200_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim200_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_wiki2014-gigaword_target_word-word_dim200_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim200_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_wiki2014-gigaword_target_word-word_dim200_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim200_en==1.0.1 + ``` diff --git a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim300_en/README.md b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim300_en/README.md index a8e67d620b8d51c472055b75467dc60fcf19f32b..f92fe8948a5f9cfa31e6bbfc7f3fb52a8cca73fb 100644 --- a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim300_en/README.md +++ b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim300_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_wiki2014-gigaword_target_word-word_dim300_en +|模型名称|glove_wiki2014-gigaword_target_word-word_dim300_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|glove| +|数据集|wiki2014-gigaword| +|是否支持Fine-tuning|否| +|文件大小|422.83MB| +|词表大小|400002| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim300_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim300_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim300_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim300_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_wiki2014-gigaword_target_word-word_dim300_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim300_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_wiki2014-gigaword_target_word-word_dim300_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,8 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim300_en==1.0.1 + ``` + \ No newline at end of file diff --git a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim50_en/README.md b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim50_en/README.md index ee25f85bab1f1823b4a93141b266eec663f3341c..edba1569e0beeca32b7a1b4378ea417e07cb7d08 100644 --- a/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim50_en/README.md +++ b/modules/text/embedding/glove_wiki2014-gigaword_target_word-word_dim50_en/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# glove_wiki2014-gigaword_target_word-word_dim50_en +|模型名称|glove_wiki2014-gigaword_target_word-word_dim50_en| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|glove| +|数据集|wiki2014-gigaword| +|是否支持Fine-tuning|否| +|文件大小|73.45MB| +|词表大小|400002| +|最新更新日期|2021-02-26| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim50_en + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim50_en') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='glove_wiki2014-gigaword_target_word-word_dim50_en') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim50_en -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/glove_wiki2014-gigaword_target_word-word_dim50_en" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m glove_wiki2014-gigaword_target_word-word_dim50_en + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/glove_wiki2014-gigaword_target_word-word_dim50_en" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install glove_wiki2014-gigaword_target_word-word_dim50_en==1.0.1 + ``` diff --git a/modules/text/embedding/tencent_ailab_chinese_embedding/README.md b/modules/text/embedding/tencent_ailab_chinese_embedding/README.md deleted file mode 100644 index 75ed2880215bb8f2f7e295093155295b505b7c99..0000000000000000000000000000000000000000 --- a/modules/text/embedding/tencent_ailab_chinese_embedding/README.md +++ /dev/null @@ -1,49 +0,0 @@ -## 概述 - -Tencent_AILab_ChineseEmbedding提供了基于海量中文语料训练学习得到的800多万个中文词语和短语的词向量表示,每一个词向量为200维。可以用于各种下游任务迁移学习。 - -更多详情参考: https://ai.tencent.com/ailab/nlp/en/embedding.html - -注:该Module由第三方开发者DesmonDay贡献。 - -## API - -```python -def context(trainable=False, max_seq_len=128, num_slots=1) -``` - -获取该Module的预训练program以及program相应的输入输出。 - -**参数** - -* trainable(bool): trainable=True表示program中的参数在Fine-tune时需要微调,否则保持不变。 -* max_seq_len(int): 模型使用的最大序列长度。 -* num_slots(int): 输入到模型所需要的文本个数,如完成单句文本分类任务,则num_slots=1;完成pointwise文本匹配任务,则num_slots=2;完成pairtwise文本匹配任务,则num_slots=3; - -**返回** - -* inputs(dict): program的输入变量 -* outputs(dict): program的输出变量 -* main_program(Program): 带有预训练参数的program - -### 代码示例 - -```python -import paddlehub as hub -import cv2 - -tencent_ailab_chinese_embedding = hub.Module(name="tencent_ailab_chinese_embedding") -inputs, outputs, program = tencent_ailab_chinese_embedding.context(trainable=True, max_seq_len=128, num_slots=1) -``` - -## 依赖 - -paddlepaddle >= 1.8.2 - -paddlehub >= 1.8.0 - -## 更新历史 - -* 1.0.0 - - 初始发布 diff --git a/modules/text/embedding/tencent_ailab_chinese_embedding/module.py b/modules/text/embedding/tencent_ailab_chinese_embedding/module.py deleted file mode 100644 index 7c2785bcfdda3e7fb01e7a85ac49942b343bd477..0000000000000000000000000000000000000000 --- a/modules/text/embedding/tencent_ailab_chinese_embedding/module.py +++ /dev/null @@ -1,149 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import io -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - for line in f: - parts = line.split("\t") - vocab[parts[0]] = int(parts[1]) - - return vocab - - -@moduleinfo( - name="tencent_ailab_chinese_embedding", - version="1.0.0", - summary= - "Tencent AI Lab Embedding Corpus for Chinese Words and Phrases and the vocab size is 8,824,331. For more information, please refer to https://ai.tencent.com/ailab/nlp/zh/embedding.html", - author="", - author_email="", - type="nlp/semantic_model") -class TencentAILabChineseEmbedding(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", "model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.vocab = load_vocab(self.vocab_path) - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained tencent_ailab_chinese_embedding - - Args: - trainable(bool): whether fine-tune the pretrained parameters of simnet_bow or not - num_slots(int): It's number of slots inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of tencent_ailab_chinese_embedding (words) - outputs(dict): the output variables of input words (word embeddings) - main_program(Program): the main_program of tencent_ailab_chinese_embedding with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - - text_1 = fluid.data(name='text', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_1 = fluid.embedding( - input=text_1, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3'], - list(main_program.global_block().vars.keys())) - - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # load the pretrained model - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {} - outputs = {} - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - - return inputs, outputs, main_program - - def get_vocab_path(self): - return self.vocab_path - - -if __name__ == "__main__": - w2v = TencentAILabChineseEmbedding() - inputs, outputs, program = w2v.context(num_slots=3) - print(inputs) - print(outputs) - print(w2v.get_vocab_path()) diff --git a/modules/text/embedding/tencent_ailab_chinese_embedding_small/README.md b/modules/text/embedding/tencent_ailab_chinese_embedding_small/README.md deleted file mode 100644 index c5d2b84f24f097c6cc8fae58fe3e26348c36f315..0000000000000000000000000000000000000000 --- a/modules/text/embedding/tencent_ailab_chinese_embedding_small/README.md +++ /dev/null @@ -1,50 +0,0 @@ -## 概述 - -Tencent_AILab_ChineseEmbedding提供了基于海量中文语料训练学习得到的800多万个中文词语和短语的词向量表示,每一个词向量为200维。 -该Module截取了原来词汇表中前200万的词语,同样可以用于各种下游任务迁移学习。 - -更多详情参考: https://ai.tencent.com/ailab/nlp/en/embedding.html - -注:该Module由第三方开发者DesmonDay贡献。 - -## API - -```python -def context(trainable=False, max_seq_len=128, num_slots=1) -``` - -获取该Module的预训练program以及program相应的输入输出。 - -**参数** - -* trainable(bool): trainable=True表示program中的参数在Fine-tune时需要微调,否则保持不变。 -* max_seq_len(int): 模型使用的最大序列长度。 -* num_slots(int): 输入到模型所需要的文本个数,如完成单句文本分类任务,则num_slots=1;完成pointwise文本匹配任务,则num_slots=2;完成pairtwise文本匹配任务,则num_slots=3; - -**返回** - -* inputs(dict): program的输入变量 -* outputs(dict): program的输出变量 -* main_program(Program): 带有预训练参数的program - -### 代码示例 - -```python -import paddlehub as hub -import cv2 - -tencent_ailab_chinese_embedding = hub.Module(name="tencent_ailab_chinese_embedding_small") -inputs, outputs, program = tencent_ailab_chinese_embedding.context(trainable=True, max_seq_len=128, num_slots=1) -``` - -## 依赖 - -paddlepaddle >= 1.8.2 - -paddlehub >= 1.8.0 - -## 更新历史 - -* 1.0.0 - - 初始发布 diff --git a/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py b/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py deleted file mode 100644 index b77f6885e2fc0197d70fe1e82203b56203316dfc..0000000000000000000000000000000000000000 --- a/modules/text/embedding/tencent_ailab_chinese_embedding_small/module.py +++ /dev/null @@ -1,149 +0,0 @@ -# -*- coding:utf-8 -*- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import io -import os - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.common.paddle_helper import add_vars_prefix -from paddlehub.module.module import moduleinfo - - -def load_vocab(file_path): - """ - load the given vocabulary - """ - vocab = {} - with io.open(file_path, 'r', encoding='utf8') as f: - for line in f: - parts = line.split("\t") - vocab[parts[0]] = int(parts[1]) - - return vocab - - -@moduleinfo( - name="tencent_ailab_chinese_embedding_small", - version="1.0.0", - summary= - "Tencent AI Lab Embedding Corpus for Chinese Words and Phrases and the vocab size is 2,000,002. For more information, please refer to https://ai.tencent.com/ailab/nlp/zh/embedding.html", - author="", - author_email="", - type="nlp/semantic_model") -class TencentAILabChineseEmbeddingSmall(hub.Module): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.pretrained_model_path = os.path.join(self.directory, "assets", "model") - self.vocab_path = os.path.join(self.directory, "assets", "vocab.txt") - self.vocab = load_vocab(self.vocab_path) - - def context(self, trainable=False, max_seq_len=128, num_slots=1): - """ - Get the input ,output and program of the pretrained word2vec_skipgram - - Args: - trainable(bool): Whether fine-tune the pretrained parameters of tencent_ailab_chinese_embedding_small or not. - num_slots(int): It's number of data inputted to the model, selectted as following options: - - - 1(default): There's only one data to be feeded in the model, e.g. the module is used for sentence classification task. - - 2: There are two data to be feeded in the model, e.g. the module is used for text matching task (point-wise). - - 3: There are three data to be feeded in the model, e.g. the module is used for text matching task (pair-wise). - - Returns: - inputs(dict): the input variables of tencent_ailab_chinese_embedding_small (words) - outputs(dict): the output variables of input words (word embeddings) - main_program(Program): the main_program of tencent_ailab_chinese_embedding_small with pretrained prameters - """ - assert num_slots >= 1 and num_slots <= 3, "num_slots must be 1, 2, or 3, but the input is %d" % num_slots - main_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - w_param_attrs = fluid.ParamAttr( - name="embedding_0.w_0", - initializer=fluid.initializer.TruncatedNormal(scale=0.02), - trainable=trainable) - - text_1 = fluid.data(name='text', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_1 = fluid.embedding( - input=text_1, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_1_name = emb_1.name - data_list = [text_1] - emb_name_list = [emb_1_name] - - if num_slots > 1: - text_2 = fluid.data(name='text_2', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_2 = fluid.embedding( - input=text_2, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_2_name = emb_2.name - data_list.append(text_2) - emb_name_list.append(emb_2_name) - - if num_slots > 2: - text_3 = fluid.data(name='text_3', shape=[-1, max_seq_len], dtype='int64', lod_level=0) - emb_3 = fluid.embedding( - input=text_3, - size=[len(self.vocab), 200], - is_sparse=True, - padding_idx=len(self.vocab) - 1, - dtype='float32', - param_attr=w_param_attrs) - emb_3_name = emb_3.name - data_list.append(text_3) - emb_name_list.append(emb_3_name) - - variable_names = filter(lambda v: v not in ['text', 'text_2', 'text_3'], - list(main_program.global_block().vars.keys())) - - prefix_name = "@HUB_{}@".format(self.name) - add_vars_prefix(program=main_program, prefix=prefix_name, vars=variable_names) - for param in main_program.global_block().iter_parameters(): - param.trainable = trainable - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - # load the pretrained model - def if_exist(var): - return os.path.exists(os.path.join(self.pretrained_model_path, var.name)) - - fluid.io.load_vars(exe, self.pretrained_model_path, predicate=if_exist) - - inputs = {} - outputs = {} - for index, data in enumerate(data_list): - if index == 0: - inputs['text'] = data - outputs['emb'] = main_program.global_block().vars[prefix_name + emb_name_list[0]] - else: - inputs['text_%s' % (index + 1)] = data - outputs['emb_%s' % (index + 1)] = main_program.global_block().vars[prefix_name + - emb_name_list[index]] - - return inputs, outputs, main_program - - def get_vocab_path(self): - return self.vocab_path - - -if __name__ == "__main__": - w2v = TencentAILabChineseEmbeddingSmall() - inputs, outputs, program = w2v.context(num_slots=3) - print(inputs) - print(outputs) - print(w2v.get_vocab_path()) diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300/README.md index 3b712b12dfd7f088ff82a88321600382619ee4c2..c5fab77a062e711149a3d1d5cf9646782797f3cc 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-character_char1-1_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-character_char1-1_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|678.65MB| +|词表大小|636200| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-character_char1-1_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-character_char1-1_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-character_char1-1_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-character_char1-1_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-character_char1-1_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-character_char1-1_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-character_char1-1_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300/README.md index 77911202e7666a926adae1b0781b9352d5bd79a9..f88623c0342cd2972a43ef318c3a40a2ee186485 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-character_char1-2_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-character_char1-2_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|844.23MB| +|词表大小|792631| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-character_char1-2_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-character_char1-2_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-character_char1-2_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-character_char1-2_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-character_char1-2_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-character_char1-2_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-character_char1-2_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300/README.md index 863f5932586b323681cb8814d9aaea215e9b294e..bbf8ec37bf3a631bde6ae3427b08fdf7c2e85b3d 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-character_char1-4_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-character_char1-4_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|1.16GB| +|词表大小|1117461| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-character_char1-4_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-character_char1-4_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-character_char1-4_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-character_char1-4_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-character_char1-4_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-character_char1-4_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-character_char1-4_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300/README.md index 2936f5e0554403558919d2003e6fe826a3b9b829..efaf4cdb57392c48466d75f6b5d7352a921b37a8 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|7.25GB| +|词表大小|6967598| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-ngram_1-2_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300/README.md index 3f5461c8aa67023cd4cc1ff846c46dd608868219..489fba9be22c41f99a96853ad25dcb3d5d97d7d6 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|5.21GB| +|词表大小|5000001| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-ngram_1-3_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300/README.md index 676ac10828de44f34b92783f6a884315542c1701..ecd24c40dcef0ae19b2d77d9f0c1050a7cfa1c9c 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|7.26GB| +|词表大小|6968998| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-ngram_2-2_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordLR_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordLR_dim300/README.md index 4f5c2465dc1877eec7969d539cd8d3e961fbb8a0..d0fee96ffca7a5d09f8829bfcca24e3b615637f0 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordLR_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordLR_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-wordLR_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-wordLR_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|1.32GB| +|词表大小|1271031| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-wordLR_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-wordLR_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-wordLR_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-wordLR_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-wordLR_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-wordLR_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-wordLR_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-wordLR_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordPosition_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordPosition_dim300/README.md index a04bd1391dd83de6b0c67bcf737dd25cf9a0d85d..6c91b7c1110c3db811bc79b3130b288aaa32b51a 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordPosition_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-wordPosition_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-wordPosition_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-wordPosition_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|679.32MB| +|词表大小|636038| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-wordPosition_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-wordPosition_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-wordPosition_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-wordPosition_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-wordPosition_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) + + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 查看代码 -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings +## 四、部署服务 -## 依赖 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlepaddle >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -paddlehub >= 2.0.0 + - 运行启动命令: -## 更新历史 + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-wordPosition_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-wordPosition_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-wordPosition_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-word_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-word_dim300/README.md index b1bf197cd8d4888f5650c52cbba942aea57c8749..2bf5e93cda0f20288e3198dc1658078a9f15fd77 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_context_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_context_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_context_word-word_dim300 +|模型名称|w2v_baidu_encyclopedia_context_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|677.74MB| +|词表大小|635952| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_context_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_context_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_context_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_context_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_context_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_context_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_bigram-char_dim300/README.md index f42d628d494b406ab1bc7ad856b364f1abe58980..09467ef670cbe7557f7d0be68881d1c1dab02718 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_bigram-char_dim300 +|模型名称|w2v_baidu_encyclopedia_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|679.29MB| +|词表大小|635976| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300/README.md index 6c0714d1c21978c32f9aea1fed31d5b3907ec6b7..cbf664f3b916c9cb144ad0ddbd6d55a8c7d4dd43 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-character_char1-1_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-character_char1-1_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|679.15MB| +|词表大小|636038| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-character_char1-1_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-character_char1-1_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-character_char1-1_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-character_char1-1_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-character_char1-1_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-character_char1-1_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-character_char1-1_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300/README.md index 384ef04cd167fca9a829e1c8cbc83cd82993c9d4..1358567413cc9e71b15641cdd0ce6e708c6a9d66 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-character_char1-2_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-character_char1-2_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|679.30MB| +|词表大小|636038| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-character_char1-2_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-character_char1-2_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-character_char1-2_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-character_char1-2_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-character_char1-2_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-character_char1-2_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-character_char1-2_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300/README.md index bdfe3a70580798c74518afff6ae1f3aa1c2fffe5..8471592a07491f0fdf2d8329afcd388cb2946ed2 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-character_char1-4_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-character_char1-4_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|679.51MB| +|词表大小|636038| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-character_char1-4_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-character_char1-4_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-character_char1-4_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-character_char1-4_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-character_char1-4_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-character_char1-4_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-character_char1-4_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300/README.md index cce2987b1d4a031e09a6087af44843e75ce425f7..b6d1429b633ce52e6f329929d3a60ac0b5916640 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|679.48MB| +|词表大小|635977| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-ngram_1-2_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300/README.md index 5da791f7c7a88e388f648a3c27c55daec232dd56..b50eee0cbd86dc13fe1025c02c00de4c9f2b8005 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|671.27MB| +|词表大小|628669| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-ngram_1-3_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300/README.md index 5d60020e062cc7a3db4b2277f7473d7b9e227d8c..e6b48044a171809d394008e6213e20140728d9a2 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|7.28GB| +|词表大小|6969069| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-ngram_2-2_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordLR_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordLR_dim300/README.md index 3ce088360ae8e72116be2a6459f00f23fbfd41da..7cd37e6a31b9d8fe7cc144c203460caa691e26db 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordLR_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordLR_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-wordLR_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-wordLR_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|678.22MB| +|词表大小|635958| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-wordLR_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-wordLR_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-wordLR_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-wordLR_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-wordLR_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-wordLR_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-wordLR_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-wordLR_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordPosition_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordPosition_dim300/README.md index 756a3bc4d5e08a86c6b9d16616d0e669a94ce3bc..8a02ec85aed7be1968f43cb5c844666ba75eb20e 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordPosition_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-wordPosition_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-wordPosition_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-wordPosition_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|679.32MB| +|词表大小|636038| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-wordPosition_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-wordPosition_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-wordPosition_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-wordPosition_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-wordPosition_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-wordPosition_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-wordPosition_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-wordPosition_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-word_dim300/README.md b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-word_dim300/README.md index 9e37a31fd5935793dbd374b700bf574c5d8b6a3c..68b4e3f2b2814308f8080f4ac2f663e81ee8f957 100644 --- a/modules/text/embedding/w2v_baidu_encyclopedia_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_baidu_encyclopedia_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_baidu_encyclopedia_target_word-word_dim300 +|模型名称|w2v_baidu_encyclopedia_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|baidu_encyclopedia| +|是否支持Fine-tuning|否| +|文件大小|678.21MB| +|词表大小|635965| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_baidu_encyclopedia_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_baidu_encyclopedia_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_baidu_encyclopedia_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_baidu_encyclopedia_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_baidu_encyclopedia_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_baidu_encyclopedia_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_financial_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_financial_target_bigram-char_dim300/README.md index 0b14e5461c787e241acafeb0670bcd36a05d3dcd..8a80e7193cad58e5774a302b4b2b595f10d03d7b 100644 --- a/modules/text/embedding/w2v_financial_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_financial_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_financial_target_bigram-char_dim300 +|模型名称|w2v_financial_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|financial| +|是否支持Fine-tuning|否| +|文件大小|499.52MB| +|词表大小|467163| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_financial_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_financial_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_financial_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_financial_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_financial_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_financial_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_financial_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_financial_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_financial_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_financial_target_word-bigram_dim300/README.md index b979aea164f2191368e391fdaf84ac98db848bde..2a136a52a1dd3b94924aa5432560866f55b437c0 100644 --- a/modules/text/embedding/w2v_financial_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_financial_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_financial_target_word-bigram_dim300 +|模型名称|w2v_financial_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|financial| +|是否支持Fine-tuning|否| +|文件大小|499.54MB| +|词表大小|467331| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_financial_target_word-bigram_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_financial_target_word-bigram_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_financial_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_financial_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_financial_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_financial_target_word-bigram_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_financial_target_word-bigram_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_financial_target_word-bigram_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_financial_target_word-char_dim300/README.md b/modules/text/embedding/w2v_financial_target_word-char_dim300/README.md index f32a61e4d82e7f654882039491a142ce063d6198..0eb2271b30efe20c94fb94af69c2a51b9e6ad816 100644 --- a/modules/text/embedding/w2v_financial_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_financial_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_financial_target_word-char_dim300 +|模型名称|w2v_financial_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|financial| +|是否支持Fine-tuning|否| +|文件大小|499.17MB| +|词表大小|467343| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_financial_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_financial_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_financial_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_financial_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_financial_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_financial_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_financial_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_financial_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_financial_target_word-word_dim300/README.md b/modules/text/embedding/w2v_financial_target_word-word_dim300/README.md index b23097319bfa2e872e9a10871a6086ff835b5d17..81d7528489d1c00866e99936ffd9fc2562a0e81a 100644 --- a/modules/text/embedding/w2v_financial_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_financial_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_financial_target_word-word_dim300 +|模型名称|w2v_financial_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|financial| +|是否支持Fine-tuning|否| +|文件大小|498.94MB| +|词表大小|467324| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_financial_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_financial_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_financial_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_financial_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_financial_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_financial_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_financial_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_financial_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_literature_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_literature_target_bigram-char_dim300/README.md index 07e8cc252723f199a88512728e9b7f25e5b9fa37..f05cd2d63e4db4b791af1755161970971d561fb2 100644 --- a/modules/text/embedding/w2v_literature_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_literature_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_literature_target_bigram-char_dim300 +|模型名称|w2v_literature_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|literature| +|是否支持Fine-tuning|否| +|文件大小|200.69MB| +|词表大小|187975| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_literature_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_literature_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_literature_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_literature_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_literature_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_literature_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_literature_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_literature_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_literature_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_literature_target_word-bigram_dim300/README.md index ef9fd0ff968b9b79c5f437dee2eda0d826f84630..2c7f7155f705ee4a63cea06d471677cfa3853e89 100644 --- a/modules/text/embedding/w2v_literature_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_literature_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_literature_target_word-bigram_dim300 +|模型名称|w2v_literature_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|literature| +|是否支持Fine-tuning|否| +|文件大小|200.59MB| +|词表大小|187962| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_literature_target_word-bigram_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_literature_target_word-bigram_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_literature_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_literature_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_literature_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_literature_target_word-bigram_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_literature_target_word-bigram_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_literature_target_word-bigram_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_literature_target_word-char_dim300/README.md b/modules/text/embedding/w2v_literature_target_word-char_dim300/README.md index 6a0432315e5b1d88113f1f95a082e6975c9a21ae..ae45d41740384aa1afd9812ed43eee280f4c0d1e 100644 --- a/modules/text/embedding/w2v_literature_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_literature_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_literature_target_word-char_dim300 +|模型名称|w2v_literature_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|literature| +|是否支持Fine-tuning|否| +|文件大小|200.44MB| +|词表大小|187980| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_literature_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_literature_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_literature_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_literature_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_literature_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_literature_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_literature_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_literature_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_literature_target_word-word_dim300/README.md b/modules/text/embedding/w2v_literature_target_word-word_dim300/README.md index 67032fbb3c23365f1e4846889c1c136efdf04e02..3e9e7c6197af65052157bf8c7bfe565b34a13948 100644 --- a/modules/text/embedding/w2v_literature_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_literature_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_literature_target_word-word_dim300 +|模型名称|w2v_literature_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|literature| +|是否支持Fine-tuning|否| +|文件大小|200.28MB| +|词表大小|187961| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_literature_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_literature_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_literature_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_literature_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_literature_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_literature_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_literature_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_literature_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_mixed-large_target_word-char_dim300/README.md b/modules/text/embedding/w2v_mixed-large_target_word-char_dim300/README.md index c0fa143c038dd844bcc326c36854c5d824267233..02e1c7e1d91270c54b6123f38c02e82b3613e7e6 100644 --- a/modules/text/embedding/w2v_mixed-large_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_mixed-large_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_mixed-large_target_word-char_dim300 +|模型名称|w2v_mixed-large_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|mixed| +|是否支持Fine-tuning|否| +|文件大小|1.35GB| +|词表大小|1292552| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_mixed-large_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_mixed-large_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_mixed-large_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_mixed-large_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_mixed-large_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_mixed-large_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_mixed-large_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_mixed-large_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_mixed-large_target_word-word_dim300/README.md b/modules/text/embedding/w2v_mixed-large_target_word-word_dim300/README.md index 3f5812c6249b25a9bd91296bdfb6f20a8bceaf30..ed987eb670bbb4264a8d0ba91faa5200d2d47900 100644 --- a/modules/text/embedding/w2v_mixed-large_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_mixed-large_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_mixed-large_target_word-word_dim300 +|模型名称|w2v_mixed-large_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|mixed| +|是否支持Fine-tuning|否| +|文件大小|1.35GB| +|词表大小|1292483| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_mixed-large_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_mixed-large_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_mixed-large_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_mixed-large_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_mixed-large_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_mixed-large_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_mixed-large_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_mixed-large_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_people_daily_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_people_daily_target_bigram-char_dim300/README.md index 171c362ad04560e26b414e474d945cc20e660a66..c3fcc7226610be97196f8609fbde66aacc91730f 100644 --- a/modules/text/embedding/w2v_people_daily_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_people_daily_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_people_daily_target_bigram-char_dim300 +|模型名称|w2v_people_daily_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|people_daily| +|是否支持Fine-tuning|否| +|文件大小|379.96MB| +|词表大小|356055| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_people_daily_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_people_daily_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_people_daily_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_people_daily_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_people_daily_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_people_daily_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_people_daily_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_people_daily_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_people_daily_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_people_daily_target_word-bigram_dim300/README.md index e2c5d4d6e492e4379379bbebd5aaaa7553147f8e..6f450cde7ef6621181f50f8e1c5b18ac5af1013b 100644 --- a/modules/text/embedding/w2v_people_daily_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_people_daily_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_people_daily_target_word-bigram_dim300 +|模型名称|w2v_people_daily_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|people_daily| +|是否支持Fine-tuning|否| +|文件大小|379.68MB| +|词表大小|355991| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_people_daily_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_people_daily_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_people_daily_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_people_daily_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_people_daily_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_people_daily_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_people_daily_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_people_daily_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_people_daily_target_word-char_dim300/README.md b/modules/text/embedding/w2v_people_daily_target_word-char_dim300/README.md index 97cb9491876b9e520caf7927e40c6672fc391ffe..249f683cbbd8500c4c0f4b976601151efaf42fcf 100644 --- a/modules/text/embedding/w2v_people_daily_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_people_daily_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_people_daily_target_word-char_dim300 +|模型名称|w2v_people_daily_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|people_daily| +|是否支持Fine-tuning|否| +|文件大小|379.45MB| +|词表大小|355998| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_people_daily_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_people_daily_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_people_daily_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_people_daily_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_people_daily_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_people_daily_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_people_daily_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_people_daily_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_people_daily_target_word-word_dim300/README.md b/modules/text/embedding/w2v_people_daily_target_word-word_dim300/README.md index 9df53cd4f63da779b8519f5d3dcc453324ba82a4..ab23fd7b2b724436e707c6fc2e3d2e059cd81ed4 100644 --- a/modules/text/embedding/w2v_people_daily_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_people_daily_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_people_daily_target_word-word_dim300 +|模型名称|w2v_people_daily_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|people_daily| +|是否支持Fine-tuning|否| +|文件大小|378.93MB| +|词表大小|355989| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_people_daily_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_people_daily_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_people_daily_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_people_daily_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_people_daily_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_people_daily_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_people_daily_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_people_daily_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_sikuquanshu_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_sikuquanshu_target_word-bigram_dim300/README.md index d2d00c45d1856d30096311673e7a1d9095da5576..c8983808b189e059b441229fb372c89e4158d3c0 100644 --- a/modules/text/embedding/w2v_sikuquanshu_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_sikuquanshu_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_sikuquanshu_target_word-bigram_dim300 +|模型名称|w2v_sikuquanshu_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|sikuquanshu| +|是否支持Fine-tuning|否| +|文件大小|20.77MB| +|词表大小|19529| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_sikuquanshu_target_word-bigram_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_sikuquanshu_target_word-bigram_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_sikuquanshu_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_sikuquanshu_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_sikuquanshu_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_sikuquanshu_target_word-bigram_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_sikuquanshu_target_word-bigram_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_sikuquanshu_target_word-bigram_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_sikuquanshu_target_word-word_dim300/README.md b/modules/text/embedding/w2v_sikuquanshu_target_word-word_dim300/README.md index 5595323223664601a541d6cd8e2cc704f36dd5ef..32a5b6df8d95cae1d2cc924ccde6e2a31a71c8e2 100644 --- a/modules/text/embedding/w2v_sikuquanshu_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_sikuquanshu_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_sikuquanshu_target_word-word_dim300 +|模型名称|w2v_sikuquanshu_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|sikuquanshu| +|是否支持Fine-tuning|否| +|文件大小|20.70MB| +|词表大小|19529| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_sikuquanshu_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_sikuquanshu_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_sikuquanshu_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_sikuquanshu_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_sikuquanshu_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_sikuquanshu_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_sikuquanshu_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_sikuquanshu_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_sogou_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_sogou_target_bigram-char_dim300/README.md index b47d51bd1312eeea11fb5ca8c3f06b073f27b532..9e984821d5ce7a7110167af1cff10cf481b41f40 100644 --- a/modules/text/embedding/w2v_sogou_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_sogou_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_sogou_target_bigram-char_dim300 +|模型名称|w2v_sogou_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|sogou| +|是否支持Fine-tuning|否| +|文件大小|389.81MB| +|词表大小|365112| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_sogou_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_sogou_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_sogou_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_sogou_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_sogou_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_sogou_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_sogou_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_sogou_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_sogou_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_sogou_target_word-bigram_dim300/README.md index 48695c354cbe36aee204f92f6298045dc394a553..728221d2d28de4126184cdeaa90ba6ba0f09a09d 100644 --- a/modules/text/embedding/w2v_sogou_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_sogou_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_sogou_target_word-bigram_dim300 +|模型名称|w2v_sogou_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|sogou| +|是否支持Fine-tuning|否| +|文件大小|388.66MB| +|词表大小|364994| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_sogou_target_word-bigram_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_sogou_target_word-bigram_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_sogou_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_sogou_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_sogou_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_sogou_target_word-bigram_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_sogou_target_word-bigram_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_sogou_target_word-bigram_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_sogou_target_word-char_dim300/README.md b/modules/text/embedding/w2v_sogou_target_word-char_dim300/README.md index fcb06ce9e478529e219e64c105ba9f405534e4dd..575f6acde662dc71f70b9bb6d4075a1a9d1ec1af 100644 --- a/modules/text/embedding/w2v_sogou_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_sogou_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_sogou_target_word-char_dim300 +|模型名称|w2v_sogou_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|sogou| +|是否支持Fine-tuning|否| +|文件大小|389.89MB| +|词表大小|365078| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_sogou_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_sogou_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_sogou_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_sogou_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_sogou_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_sogou_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_sogou_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_sogou_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_sogou_target_word-word_dim300/README.md b/modules/text/embedding/w2v_sogou_target_word-word_dim300/README.md index bc264a7280c7a8195742b428e37dfa73de3a92ca..f1d86bea2512b79535d9e52d702aace58ab99596 100644 --- a/modules/text/embedding/w2v_sogou_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_sogou_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_sogou_target_word-word_dim300 +|模型名称|w2v_sogou_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|sogou| +|是否支持Fine-tuning|否| +|文件大小|388.66MB| +|词表大小|364992| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_sogou_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_sogou_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_sogou_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_sogou_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_sogou_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_sogou_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_sogou_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_sogou_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_weibo_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_weibo_target_bigram-char_dim300/README.md index fc4262a138624e3efe7655dce765ae415cfcdd56..8c7cff9fdee886deb434566859f34cd5790e72b8 100644 --- a/modules/text/embedding/w2v_weibo_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_weibo_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_weibo_target_bigram-char_dim300 +|模型名称|w2v_weibo_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|weibo| +|是否支持Fine-tuning|否| +|文件大小|208.24MB| +|词表大小|195199| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_weibo_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_weibo_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_weibo_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_weibo_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_weibo_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_weibo_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_weibo_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_weibo_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_weibo_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_weibo_target_word-bigram_dim300/README.md index f20eaceeaa3d5ab5f75138e0f0a9d424fecd6c46..75e8defba0978722a4e67fd33bb39676edd235f6 100644 --- a/modules/text/embedding/w2v_weibo_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_weibo_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_weibo_target_word-bigram_dim300 +|模型名称|w2v_weibo_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|weibo| +|是否支持Fine-tuning|否| +|文件大小|208.19MB| +|词表大小|195204| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_weibo_target_word-bigram_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_weibo_target_word-bigram_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_weibo_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_weibo_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_weibo_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_weibo_target_word-bigram_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_weibo_target_word-bigram_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_weibo_target_word-bigram_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_weibo_target_word-char_dim300/README.md b/modules/text/embedding/w2v_weibo_target_word-char_dim300/README.md index d6393e34a5519521bd59dc2d4e090b9444abbac0..bac3d11a807309353138e4ac3a5932b3cf8b7ac0 100644 --- a/modules/text/embedding/w2v_weibo_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_weibo_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_weibo_target_word-char_dim300 +|模型名称|w2v_weibo_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|weibo| +|是否支持Fine-tuning|否| +|文件大小|208.03MB| +|词表大小|195204| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_weibo_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_weibo_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_weibo_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_weibo_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_weibo_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_weibo_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_weibo_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_weibo_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_weibo_target_word-word_dim300/README.md b/modules/text/embedding/w2v_weibo_target_word-word_dim300/README.md index ea1f5971831cdc2a4adc8f583fff712fc86ff1f6..eed0224a7df26470fd6eb5db387a59c7740f34ce 100644 --- a/modules/text/embedding/w2v_weibo_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_weibo_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_weibo_target_word-word_dim300 +|模型名称|w2v_weibo_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|weibo| +|是否支持Fine-tuning|否| +|文件大小|207.94MB| +|词表大小|195204| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_weibo_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_weibo_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_weibo_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_weibo_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_weibo_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_weibo_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_weibo_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_weibo_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_wiki_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_wiki_target_bigram-char_dim300/README.md index 007012f4145180baf874d25c36e84ab5800fc936..b655a0384196134bf9f9d0e6092c8b5f776c119b 100644 --- a/modules/text/embedding/w2v_wiki_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_wiki_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_wiki_target_bigram-char_dim300 +|模型名称|w2v_wiki_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|wiki| +|是否支持Fine-tuning|否| +|文件大小|375.98MB| +|词表大小|352274| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_wiki_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_wiki_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_wiki_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_wiki_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_wiki_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_wiki_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_wiki_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_wiki_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_wiki_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_wiki_target_word-bigram_dim300/README.md index 53bd2c35901b5bd808d58976a91b6eea60341cdf..1c9eb46750652d8087243fe7bfa3c0f411dbd193 100644 --- a/modules/text/embedding/w2v_wiki_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_wiki_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_wiki_target_word-bigram_dim300 +|模型名称|w2v_wiki_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|wiki| +|是否支持Fine-tuning|否| +|文件大小|375.72MB| +|词表大小|352219| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_wiki_target_word-bigram_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_wiki_target_word-bigram_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_wiki_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_wiki_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_wiki_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_wiki_target_word-bigram_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_wiki_target_word-bigram_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_wiki_target_word-bigram_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_wiki_target_word-char_dim300/README.md b/modules/text/embedding/w2v_wiki_target_word-char_dim300/README.md index 0c8d321ffa9b2f7ac43cf0078b2bda0468472755..0c203ab9e8be286ccbc544ddbebfdc256ab52c06 100644 --- a/modules/text/embedding/w2v_wiki_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_wiki_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_wiki_target_word-char_dim300 +|模型名称|w2v_wiki_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|wiki| +|是否支持Fine-tuning|否| +|文件大小|375.52MB| +|词表大小|352223| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_wiki_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_wiki_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_wiki_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_wiki_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_wiki_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_wiki_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_wiki_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_wiki_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_wiki_target_word-word_dim300/README.md b/modules/text/embedding/w2v_wiki_target_word-word_dim300/README.md index 9d8b9204abf1ffe3dcd296d16aa62b57e5f79b17..3b8bed093910506e64d55c18cc8b3dce891b32ca 100644 --- a/modules/text/embedding/w2v_wiki_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_wiki_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_wiki_target_word-word_dim300 +|模型名称|w2v_wiki_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|wiki| +|是否支持Fine-tuning|否| +|文件大小|374.95MB| +|词表大小|352219| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_wiki_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_wiki_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_wiki_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_wiki_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_wiki_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_wiki_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_wiki_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_wiki_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_zhihu_target_bigram-char_dim300/README.md b/modules/text/embedding/w2v_zhihu_target_bigram-char_dim300/README.md index 5dceb66ac6a25dd6fecf9e50e63a36d2e029504d..edb05155462bec1ca774aaf57b31fb47774d18da 100644 --- a/modules/text/embedding/w2v_zhihu_target_bigram-char_dim300/README.md +++ b/modules/text/embedding/w2v_zhihu_target_bigram-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_zhihu_target_bigram-char_dim300 +|模型名称|w2v_zhihu_target_bigram-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|zhihu| +|是否支持Fine-tuning|否| +|文件大小|277.35MB| +|词表大小|259755| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_zhihu_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_zhihu_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_zhihu_target_bigram-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_zhihu_target_bigram-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_zhihu_target_bigram-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_zhihu_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_zhihu_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_zhihu_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_zhihu_target_word-bigram_dim300/README.md b/modules/text/embedding/w2v_zhihu_target_word-bigram_dim300/README.md index 5a3226bf7f2dbe9bee6dd68eb445f5f31fa216b5..5f0613f7b75b75b2c8b3b97f2135e96e7342db6e 100644 --- a/modules/text/embedding/w2v_zhihu_target_word-bigram_dim300/README.md +++ b/modules/text/embedding/w2v_zhihu_target_word-bigram_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_zhihu_target_word-bigram_dim300 +|模型名称|w2v_zhihu_target_word-bigram_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|zhihu| +|是否支持Fine-tuning|否| +|文件大小|277.53MB| +|词表大小|259885| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_zhihu_target_bigram-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_zhihu_target_bigram-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_zhihu_target_word-bigram_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_zhihu_target_word-bigram_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_zhihu_target_word-bigram_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_zhihu_target_bigram-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_zhihu_target_bigram-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_zhihu_target_bigram-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_zhihu_target_word-char_dim300/README.md b/modules/text/embedding/w2v_zhihu_target_word-char_dim300/README.md index e41c6c8ce646878a13e4df9d78aee64196b5c3cc..b07c6dddae8a12b482935b1380ae271d55019480 100644 --- a/modules/text/embedding/w2v_zhihu_target_word-char_dim300/README.md +++ b/modules/text/embedding/w2v_zhihu_target_word-char_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_zhihu_target_word-char_dim300 +|模型名称|w2v_zhihu_target_word-char_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|zhihu| +|是否支持Fine-tuning|否| +|文件大小|277.40MB| +|词表大小|259940| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_zhihu_target_word-char_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_zhihu_target_word-char_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_zhihu_target_word-char_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_zhihu_target_word-char_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_zhihu_target_word-char_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_zhihu_target_word-char_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_zhihu_target_word-char_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_zhihu_target_word-char_dim300==1.0.1 + ``` diff --git a/modules/text/embedding/w2v_zhihu_target_word-word_dim300/README.md b/modules/text/embedding/w2v_zhihu_target_word-word_dim300/README.md index c4b3370417df369a00070ebde93fe1e5cd88e022..c95a42f34516355bae7eccee5e52954096ca7b89 100644 --- a/modules/text/embedding/w2v_zhihu_target_word-word_dim300/README.md +++ b/modules/text/embedding/w2v_zhihu_target_word-word_dim300/README.md @@ -1,144 +1,172 @@ -## 概述 -PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) +# w2v_zhihu_target_word-word_dim300 +|模型名称|w2v_zhihu_target_word-word_dim300| +| :--- | :---: | +|类别|文本-词嵌入| +|网络|w2v| +|数据集|zhihu| +|是否支持Fine-tuning|否| +|文件大小|276.98MB| +|词表大小|259871| +|最新更新日期|2021-04-28| +|数据指标|-| -## API +## 一、模型基本信息 -```python -def __init__( - *args, - **kwargs -) -``` +- ### 模型介绍 -创建一个Embedding Module对象,默认无需参数。 + - PaddleHub提供多个开源的预训练Embedding模型。这些Embedding模型可根据不同语料、不同训练方式和不同的维度进行区分,关于模型的具体信息可参考PaddleNLP的文档:[Embedding模型汇总](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/docs/embeddings.md) -**参数** -* `*args`: 用户额外指定的列表类型的参数。 -* `**kwargs`:用户额外指定的关键字字典类型的参数。 +## 二、安装 -关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) +- ### 1、环境依赖 + - paddlepaddle >= 2.0.0 -```python -def search( - words: Union[List[str], str, int], -) -``` + - paddlehub >= 2.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 +- ### 2、安装 -**参数** -* `words`: 需要获取的词向量的词、词列表或者词编号。 + - ```shell + $ hub install w2v_zhihu_target_word-word_dim300 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) +## 三、模型API -```python -def cosine_sim( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 +- ### 1、预测代码示例 -**参数** -* `word_a`: 需要计算余弦相似度的单词a。 -* `word_b`: 需要计算余弦相似度的单词b。 + - ```python + import paddlehub as hub + embedding = hub.Module(name='w2v_zhihu_target_word-word_dim300') + # 获取单词的embedding + embedding.search("中国") + # 计算两个词向量的余弦相似度 + embedding.cosine_sim("中国", "美国") + # 计算两个词向量的内积 + embedding.dot("中国", "美国") + ``` -```python -def dot( - word_a: str, - word_b: str, -) -``` -计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 +- ### 2、API -**参数** -* `word_a`: 需要计算内积的单词a。 -* `word_b`: 需要计算内积的单词b。 + - ```python + def __init__( + *args, + **kwargs + ) + ``` + - 创建一个Embedding Module对象,默认无需参数。 -```python -def get_vocab_path() -``` -获取本地词表文件的路径信息。 + - **参数** + - `*args`: 用户额外指定的列表类型的参数。 + - `**kwargs`:用户额外指定的关键字字典类型的参数。 + - 关于额外参数的详情可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -```python -def get_tokenizer(*args, **kwargs) -``` -获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -**参数** -* `*args`: 额外传递的列表形式的参数。 -* `**kwargs`: 额外传递的字典形式的参数。 + - ```python + def search( + words: Union[List[str], str, int], + ) + ``` -关于额外参数的详情,可查看[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/develop/PaddleNLP/paddlenlp/data/tokenizer.py) + - 获取一个或多个词的embedding。输入可以是`str`、`List[str]`和`int`类型,分别代表获取一个词,多个词和指定词编号的embedding,词的编号和模型的词典相关,词典可通过模型实例的`vocab`属性获取。 + - **参数** + - `words`: 需要获取的词向量的词、词列表或者词编号。 -更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -## 代码示例 + - ```python + def cosine_sim( + word_a: str, + word_b: str, + ) + ``` -```python -import paddlehub as hub -embedding = hub.Module(name='w2v_zhihu_target_word-word_dim300') + - 计算两个词embedding的余弦相似度。需要注意的是`word_a`和`word_b`都需要是词典里的单词,否则将会被认为是OOV(Out-Of-Vocabulary),同时被替换为`unknown_token`。 -# 获取单词的embedding -embedding.search("中国") -# 计算两个词向量的余弦相似度 -embedding.cosine_sim("中国", "美国") -# 计算两个词向量的内积 -embedding.dot("中国", "美国") -``` + - **参数** + - `word_a`: 需要计算余弦相似度的单词a。 + - `word_b`: 需要计算余弦相似度的单词b。 -## 部署服务 -通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 + - ```python + def dot( + word_a: str, + word_b: str, + ) + ``` -### Step1: 启动PaddleHub Serving + - 计算两个词embedding的内积。对于输入单词同样需要注意OOV问题。 -运行启动命令: + - **参数** + - `word_a`: 需要计算内积的单词a。 + - `word_b`: 需要计算内积的单词b。 -```shell -$ hub serving start -m w2v_zhihu_target_word-word_dim300 -``` -这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + - ```python + def get_vocab_path() + ``` -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - 获取本地词表文件的路径信息。 -### Step2: 发送预测请求 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - ```python + def get_tokenizer(*args, **kwargs) + ``` -```python -import requests -import json + - 获取当前模型的tokenizer,返回一个JiebaTokenizer的实例,当前只支持中文embedding模型。 -# 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] -word_pairs = [["中国", "美国"], ["今天", "明天"]] -# 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 -data = {"data": word_pairs} -# 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip -url = "http://10.12.121.132:8866/predict/w2v_zhihu_target_word-word_dim300" -# 指定post请求的headers为application/json方式 -headers = {"Content-Type": "application/json"} + - **参数** + - `*args`: 额外传递的列表形式的参数。 + - `**kwargs`: 额外传递的字典形式的参数。 -r = requests.post(url=url, headers=headers, data=json.dumps(data)) -print(r.json()) -``` + - 关于额外参数的详情可参考[paddlenlp.data.tokenizer.JiebaTokenizer](https://github.com/PaddlePaddle/models/blob/release/2.0-beta/PaddleNLP/paddlenlp/data/tokenizer.py) -## 查看代码 + - 更多api详情和用法可参考[paddlenlp.embeddings](https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings) -https://github.com/PaddlePaddle/models/tree/release/2.0-beta/PaddleNLP/paddlenlp/embeddings -## 依赖 +## 四、部署服务 -paddlepaddle >= 2.0.0 +- 通过PaddleHub Serving,可以部署一个在线获取两个词向量的余弦相似度的服务。 -paddlehub >= 2.0.0 +- ### Step1: 启动PaddleHub Serving -## 更新历史 + - 运行启动命令: + + - ```shell + $ hub serving start -m w2v_zhihu_target_word-word_dim300 + ``` + + - 这样就完成了一个获取词向量的余弦相似度服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步: 发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 指定用于计算余弦相似度的单词对[[word_a, word_b], [word_a, word_b], ... ]] + word_pairs = [["中国", "美国"], ["今天", "明天"]] + # 以key的方式指定word_pairs传入预测方法的时的参数,此例中为"data",对于每一对单词,调用cosine_sim进行余弦相似度的计算 + data = {"data": word_pairs} + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/w2v_zhihu_target_word-word_dim300" + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + +## 五、更新历史 * 1.0.0 @@ -146,4 +174,7 @@ paddlehub >= 2.0.0 * 1.0.1 - 支持基于embedding的文本分类和序列标注finetune任务 + 优化模型 + - ```shell + $ hub install w2v_zhihu_target_word-word_dim300==1.0.1 + ``` diff --git a/modules/text/lexical_analysis/lac/module.py b/modules/text/lexical_analysis/lac/module.py index fb460ba58c2621274b591ea8cb84a33dfe3565d4..40136fe63b8150434d323fd951ba5235b74c9f8b 100644 --- a/modules/text/lexical_analysis/lac/module.py +++ b/modules/text/lexical_analysis/lac/module.py @@ -13,7 +13,10 @@ import six import numpy as np import paddle.fluid as fluid -from paddle.fluid.core import PaddleTensor, AnalysisConfig, create_paddle_predictor + +from paddle.inference import Config +from paddle.inference import create_predictor + import paddlehub as hub from paddlehub.common.logger import logger from paddlehub.common.paddle_helper import add_vars_prefix @@ -62,26 +65,86 @@ class LAC(hub.Module): self._set_config() + def _get_device_id(self, places): + try: + places = os.environ[places] + id = int(places) + except: + id = -1 + return id + def _set_config(self): """ predictor config setting """ - cpu_config = AnalysisConfig(self.pretrained_model_path) + + # create default cpu predictor + cpu_config = Config(self.pretrained_model_path) cpu_config.disable_glog_info() cpu_config.disable_gpu() - self.cpu_predictor = create_paddle_predictor(cpu_config) - - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - use_gpu = True - except: - use_gpu = False - if use_gpu: - gpu_config = AnalysisConfig(self.pretrained_model_path) + self.cpu_predictor = create_predictor(cpu_config) + + # create predictors using various types of devices + + # npu + npu_id = self._get_device_id("FLAGS_selected_npus") + if npu_id != -1: + # use npu + npu_config = Config(self.pretrained_model_path) + npu_config.disable_glog_info() + npu_config.enable_npu(device_id=npu_id) + self.npu_predictor = create_predictor(npu_config) + + # gpu + gpu_id = self._get_device_id("CUDA_VISIBLE_DEVICES") + if gpu_id != -1: + # use gpu + gpu_config = Config(self.pretrained_model_path) gpu_config.disable_glog_info() - gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=0) - self.gpu_predictor = create_paddle_predictor(gpu_config) + gpu_config.enable_use_gpu(memory_pool_init_size_mb=500, device_id=gpu_id) + self.gpu_predictor = create_predictor(gpu_config) + + # xpu + xpu_id = self._get_device_id("XPU_VISIBLE_DEVICES") + if xpu_id != -1: + # use xpu + xpu_config = Config(self.pretrained_model_path) + xpu_config.disable_glog_info() + xpu_config.enable_xpu(100) + self.xpu_predictor = create_predictor(xpu_config) + + def _internal_predict(self, predictor, texts): + """ + Tranform the texts(list) to Tensor and then do "real predict" + Args: + texts(list): texts + Returns: + result(PaddleInferTensor): predict output + """ + + # texts to data and lod + lod = [0] + data = [] + for i, text in enumerate(texts): + text_inds = word_to_ids(text, self.word2id_dict, self.word_replace_dict, oov_id=self.oov_id) + data += text_inds + lod.append(len(text_inds) + lod[i]) + + # get predictor tensor + input_names = predictor.get_input_names() + input_tensor = predictor.get_input_handle(input_names[0]) + + # set data, shape and lod + input_tensor.copy_from_cpu(np.array(data).astype('int64')) + input_tensor.reshape([lod[-1], 1]) + input_tensor.set_lod([lod]) + + # real predict + predictor.run() + output_names = predictor.get_output_names() + output_handle = predictor.get_output_handle(output_names[0]) + + return output_handle def context(self, trainable=False): """ @@ -167,26 +230,6 @@ class LAC(hub.Module): texts = unicode_texts return texts - def texts2tensor(self, texts): - """ - Tranform the texts(list) to PaddleTensor - Args: - texts(list): texts - Returns: - tensor(PaddleTensor): tensor with texts data - """ - lod = [0] - data = [] - for i, text in enumerate(texts): - text_inds = word_to_ids(text, self.word2id_dict, self.word_replace_dict, oov_id=self.oov_id) - data += text_inds - lod.append(len(text_inds) + lod[i]) - tensor = PaddleTensor(np.array(data).astype('int64')) - tensor.name = "words" - tensor.lod = [lod] - tensor.shape = [lod[-1], 1] - return tensor - def _get_index(self, data_list, item=""): """ find all indexes of item in data_list @@ -198,7 +241,7 @@ class LAC(hub.Module): return res @serving - def cut(self, text, use_gpu=False, batch_size=1, return_tag=True): + def cut(self, text, use_gpu=False, batch_size=1, return_tag=True, use_device=None): """ The main function that segments an entire text that contains Chinese characters into separated words. @@ -207,20 +250,32 @@ class LAC(hub.Module): use_gpu(bool): whether use gpu to predict or not batch_size(int): the program deals once with one batch return_tag: Whether to get tag or not. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: results(dict or list): The word segmentation result of the input text, whose key is 'word', if text is a list. If text is a str, the word segmentation result (list) is obtained. """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor if isinstance(text, list) and len(text) != 0: @@ -240,13 +295,8 @@ class LAC(hub.Module): batch_data = predicted_data[start_idx:] start_idx = start_idx + batch_size - tensor_words = self.texts2tensor(batch_data) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = parse_result(batch_data, batch_out[0], self.id2label_dict, interventer=self.custom) + batch_out = self._internal_predict(predictor, batch_data) + batch_result = parse_result(batch_data, batch_out, self.id2label_dict, interventer=self.custom) results += batch_result for index in empty_str_indexes: @@ -259,13 +309,8 @@ class LAC(hub.Module): return results elif isinstance(text, str) and text != "": - tensor_words = self.texts2tensor([text]) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = parse_result([text], batch_out[0], self.id2label_dict, interventer=self.custom) + batch_out = self._internal_predict(predictor, [text]) + batch_result = parse_result([text], batch_out, self.id2label_dict, interventer=self.custom) return batch_result[0]['word'] elif text == "": @@ -273,7 +318,7 @@ class LAC(hub.Module): else: raise TypeError("The input data is inconsistent with expectations.") - def lexical_analysis(self, texts=[], data={}, use_gpu=False, batch_size=1, return_tag=True): + def lexical_analysis(self, texts=[], data={}, use_gpu=False, batch_size=1, return_tag=True, use_device=None): """ Get the word segmentation results with the texts as input @@ -283,19 +328,30 @@ class LAC(hub.Module): use_gpu(bool): whether use gpu to predict or not batch_size(int): the program deals once with one batch return_tag: Whether to get tag or not. + use_device (str): use cpu, gpu, xpu or npu, overwrites use_gpu flag. Returns: results(list): the word segmentation results """ - if use_gpu: - try: - _places = os.environ["CUDA_VISIBLE_DEVICES"] - int(_places[0]) - except: - raise RuntimeError( - "Environment Variable CUDA_VISIBLE_DEVICES is not set correctly. If you wanna use gpu, please set CUDA_VISIBLE_DEVICES as cuda_device_id." - ) + # real predictor to use + if use_device is not None: + if use_device == "cpu": + predictor = self.cpu_predictor + elif use_device == "xpu": + predictor = self.xpu_predictor + elif use_device == "npu": + predictor = self.npu_predictor + elif use_device == "gpu": + predictor = self.gpu_predictor + else: + raise Exception("Unsupported device: " + use_device) + else: + # use_device is not set, therefore follow use_gpu + if use_gpu: + predictor = self.gpu_predictor + else: + predictor = self.cpu_predictor if texts != [] and isinstance(texts, list) and data == {}: predicted_data = texts @@ -320,13 +376,8 @@ class LAC(hub.Module): batch_data = predicted_data[start_idx:] start_idx = start_idx + batch_size - tensor_words = self.texts2tensor(batch_data) - - if use_gpu: - batch_out = self.gpu_predictor.run([tensor_words]) - else: - batch_out = self.cpu_predictor.run([tensor_words]) - batch_result = parse_result(batch_data, batch_out[0], self.id2label_dict, interventer=self.custom) + batch_out = self._internal_predict(predictor, batch_data) + batch_result = parse_result(batch_data, batch_out, self.id2label_dict, interventer=self.custom) results += batch_result for index in empty_str_indexes: @@ -344,8 +395,10 @@ class LAC(hub.Module): """ Run as a command """ - self.parser = argparse.ArgumentParser( - description="Run the lac module.", prog='hub run lac', usage='%(prog)s', add_help=True) + self.parser = argparse.ArgumentParser(description="Run the lac module.", + prog='hub run lac', + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -365,8 +418,11 @@ class LAC(hub.Module): if args.user_dict: self.set_user_dict(args.user_dict) - results = self.lexical_analysis( - texts=input_data, use_gpu=args.use_gpu, batch_size=args.batch_size, return_tag=args.return_tag) + results = self.lexical_analysis(texts=input_data, + use_gpu=args.use_gpu, + batch_size=args.batch_size, + return_tag=args.return_tag, + use_device=args.use_device) return results @@ -388,17 +444,23 @@ class LAC(hub.Module): """ Add the command config options """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU or not") + self.arg_config_group.add_argument('--use_gpu', + type=ast.literal_eval, + default=False, + help="whether use GPU or not") self.arg_config_group.add_argument('--batch_size', type=int, default=1, help="batch size for prediction") - self.arg_config_group.add_argument( - '--user_dict', - type=str, - default=None, - help="customized dictionary for intervening the word segmentation result") - self.arg_config_group.add_argument( - '--return_tag', type=ast.literal_eval, default=True, help="whether return tags of results or not") + self.arg_config_group.add_argument('--user_dict', + type=str, + default=None, + help="customized dictionary for intervening the word segmentation result") + self.arg_config_group.add_argument('--return_tag', + type=ast.literal_eval, + default=True, + help="whether return tags of results or not") + self.arg_config_group.add_argument('--use_device', + choices=["cpu", "gpu", "xpu", "npu"], + help="use cpu, gpu, xpu or npu. overwrites use_gpu flag.") def add_module_input_arg(self): """ diff --git a/modules/text/lexical_analysis/lac/processor.py b/modules/text/lexical_analysis/lac/processor.py index 6ad9d6616b173541b8b74e7bf963788e81caf3c2..1521182ee97b2d5dd5695dce175b558749df1724 100644 --- a/modules/text/lexical_analysis/lac/processor.py +++ b/modules/text/lexical_analysis/lac/processor.py @@ -251,8 +251,8 @@ def word_to_ids(words, word2id_dict, word_replace_dict, oov_id=None): def parse_result(lines, crf_decode, id2label_dict, interventer=None): """Convert model's output tensor into string and tags """ - offset_list = crf_decode.lod[0] - crf_decode = crf_decode.as_ndarray() + offset_list = crf_decode.lod()[0] + crf_decode = crf_decode.copy_to_cpu() batch_size = len(offset_list) - 1 batch_out = [] for sent_index in range(batch_size): diff --git a/modules/text/punctuation_restoration/auto_punc/README.md b/modules/text/punctuation_restoration/auto_punc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b107574afe79ee4ad14da33a4a0af331cc9531b0 --- /dev/null +++ b/modules/text/punctuation_restoration/auto_punc/README.md @@ -0,0 +1,150 @@ +# auto_punc + +|模型名称|auto_punc| +| :--- | :---: | +|类别|文本-标点恢复| +|网络|Ernie-1.0| +|数据集|WuDaoCorpora 2.0| +|是否支持Fine-tuning|否| +|模型大小|568MB| +|最新更新日期|2021-12-24| +|数据指标|-| + +## 一、模型基本信息 + +### 模型介绍 + +Ernie是百度提出的基于知识增强的持续学习语义理解模型,该模型将大数据预训练与多源丰富知识相结合,通过持续学习技术,不断吸收海量文本数据中词汇、结构、语义等方面的知识,实现模型效果不断进化。 + +["悟道"文本数据集](https://ks3-cn-beijing.ksyun.com/resources/WuDaoCorpora/WuDaoCorpora__A_Super_Large_scale_Chinese_Corporafor_Pre_training_Language_Models.pdf) +采用20多种规则从100TB原始网页数据中清洗得出最终数据集,注重隐私数据信息的去除,源头上避免GPT-3存在的隐私泄露风险;包含教育、科技等50+个行业数据标签,可以支持多领域预训练模型的训练。 +- 数据总量:3TB +- 数据格式:json +- 开源数量:200GB +- 数据集下载:https://resource.wudaoai.cn/ +- 日期:2021年12月23日 + +auto_punc采用了Ernie1.0预训练模型,在[WuDaoCorpora 2.0](https://resource.wudaoai.cn/home)的200G开源文本数据集上进行了标点恢复任务的训练,模型可直接用于预测,对输入的对中文文本自动添加7种标点符号:逗号(,)、句号(。)、感叹号(!)、问号(?)、顿号(、)、冒号(:)和分号(;)。 + +

+
+

+ +

+
+

+ + +更多详情请参考 +- [WuDaoCorpora: A Super Large-scale Chinese Corpora for Pre-training Language Models](https://ks3-cn-beijing.ksyun.com/resources/WuDaoCorpora/WuDaoCorpora__A_Super_Large_scale_Chinese_Corporafor_Pre_training_Language_Models.pdf) +- [ERNIE: Enhanced Representation through Knowledge Integration](https://arxiv.org/abs/1904.09223) + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install auto_punc + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、预测代码示例 + + ```python + import paddlehub as hub + + model = hub.Module( + name='auto_punc', + version='1.0.0') + + texts = [ + '今天的天气真好啊你下午有空吗我想约你一起去逛街', + '我最喜欢的诗句是先天下之忧而忧后天下之乐而乐', + ] + punc_texts = model.add_puncs(texts) + print(punc_texts) + # ['我最喜欢的诗句是:先天下之忧而忧,后天下之乐而乐。', '今天的天气真好啊!你下午有空吗?我想约你一起去逛街。'] + ``` + +- ### 2、API + - ```python + def add_puncs( + texts: Union[str, List[str]], + max_length=256, + device='cpu' + ) + ``` + - 对输入的中文文本自动添加标点符号。 + + - **参数** + + - `texts`:输入的中文文本,可为str或List[str]类型,预测时,中英文和数字以外的字符将会被删除。 + - `max_length`:模型预测时输入的最大长度,超过时文本会被截断,默认为256。 + - `device`:预测时使用的设备,默认为`cpu`,如需使用gpu预测,请设置为`gpu`。 + + - **返回** + + - `punc_texts`:List[str]类型,返回添加标点后的文本列表。 + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线的文本标点添加的服务。 + +- ### 第一步:启动PaddleHub Serving + + - ```shell + $ hub serving start -m auto_punc + ``` + + - 这样就完成了一个文本标点添加服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 输入的中文文本,中英文和数字之外的字符在模型预测前会被删除 + texts = [ + '今天的天气真好啊你下午有空吗我想约你一起去逛街', + '我最喜欢的诗句是先天下之忧而忧后天下之乐而乐', + ] + + # 以key的方式指定text传入预测方法的时的参数,此例中为"texts" + data = {"texts": texts} + + # 发送post请求,content-type类型应指定json方式,url中的ip地址需改为对应机器的ip + url = "http://127.0.0.1:8866/predict/auto_punc" + + # 指定post请求的headers为application/json方式 + headers = {"Content-Type": "application/json"} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + ```shell + $ hub install auto_punc + ``` diff --git a/modules/text/punctuation_restoration/auto_punc/__init__.py b/modules/text/punctuation_restoration/auto_punc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/punctuation_restoration/auto_punc/module.py b/modules/text/punctuation_restoration/auto_punc/module.py new file mode 100644 index 0000000000000000000000000000000000000000..8a07812771735c4fa46bcc770f172d9eb0304078 --- /dev/null +++ b/modules/text/punctuation_restoration/auto_punc/module.py @@ -0,0 +1,127 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import re +from typing import List, Union + +import numpy as np +import paddle +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving +from paddlehub.utils.log import logger +from paddlenlp.transformers import ErnieTokenizer, ErnieForTokenClassification +from paddlenlp.data import Pad + + +@moduleinfo( + name="auto_punc", + version="1.0.0", + summary="", + author="KPatrick", + author_email="", + type="text/punctuation_restoration") +class Ernie(paddle.nn.Layer): + def __init__(self): + super(Ernie, self).__init__() + res_dir = os.path.join(MODULE_HOME, 'auto_punc') + punc_vocab_file = os.path.join(res_dir, 'assets', 'punc_vocab.txt') + ckpt_dir = os.path.join(res_dir, 'assets', 'ckpt') + + self.punc_vocab = self._load_dict(punc_vocab_file) + self.punc_list = list(self.punc_vocab.keys()) + self.model = ErnieForTokenClassification.from_pretrained(ckpt_dir) + self.model.eval() + self.tokenizer = ErnieTokenizer.from_pretrained('ernie-1.0') + + @staticmethod + def _load_dict(dict_path): + vocab = {} + i = 0 + with open(dict_path, 'r', encoding='utf-8') as fin: + for line in fin: + key = line.strip('\n') + vocab[key] = i + i += 1 + return vocab + + @staticmethod + def _clean_text(text, punc_list): + text = text.lower() + text = re.sub('[^A-Za-z0-9\u4e00-\u9fa5]', '', text) + text = re.sub(f'[{"".join([p for p in punc_list][1:])}]', '', text) + return text + + def forward(self, text: str): + wav = None + input_ids = self.frontend.get_input_ids(text, merge_sentences=True) + phone_ids = input_ids["phone_ids"] + for part_phone_ids in phone_ids: + with paddle.no_grad(): + mel = self.fastspeech2_inference(part_phone_ids) + temp_wav = self.pwg_inference(mel) + if wav is None: + wav = temp_wav + else: + wav = paddle.concat([wav, temp_wav]) + return wav + + @serving + def add_puncs(self, texts: Union[str, List[str]], max_length=256, device='cpu'): + assert isinstance(texts, str) or (isinstance(texts, list) and isinstance(texts[0], str)), \ + 'Input data should be str or List[str], but got {}'.format(type(texts)) + + if isinstance(texts, str): + texts = [texts] + + input_ids = [] + seg_ids = [] + seq_len = [] + for i in range(len(texts)): + clean_text = self._clean_text(texts[i], self.punc_list) + assert len(clean_text) > 0, f'Invalid input string: {texts[i]}' + + tokenized_input = self.tokenizer( + list(clean_text), return_length=True, is_split_into_words=True, max_seq_len=max_length) + + input_ids.append(tokenized_input['input_ids']) + seg_ids.append(tokenized_input['token_type_ids']) + seq_len.append(tokenized_input['seq_len']) + + paddle.set_device(device) + with paddle.no_grad(): + pad_func_for_input_ids = Pad(axis=0, pad_val=self.tokenizer.pad_token_id, dtype='int64') + pad_func_for_seg_ids = Pad(axis=0, pad_val=self.tokenizer.pad_token_type_id, dtype='int64') + input_ids = paddle.to_tensor(pad_func_for_input_ids(input_ids)) + seg_ids = paddle.to_tensor(pad_func_for_seg_ids(seg_ids)) + logits = self.model(input_ids, seg_ids) + preds = paddle.argmax(logits, axis=-1) + + tokens = [] + labels = [] + for i in range(len(input_ids)): + tokens.append(self.tokenizer.convert_ids_to_tokens(input_ids[i, 1:seq_len[i] - 1].tolist())) + labels.append(preds[i, 1:seq_len[i] - 1].tolist()) # Remove predictions of special tokens. + + punc_texts = [] + for token, label in zip(tokens, labels): + assert len(token) == len(label) + text = '' + for t, l in zip(token, label): + text += t + if l != 0: # Non punc. + text += self.punc_list[l] + punc_texts.append(text) + + return punc_texts diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md index 92361ed695c49d5108b9ea9690eb690eca3446e5..dc23a5dfbf0aa4d564f4e658fa53171b8e27f1de 100644 --- a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/README.md @@ -65,7 +65,6 @@ for result in results: print(result['text']) print(result['sentiment_label']) - print(result['sentiment_key']) print(result['positive_probs']) print(result['negative_probs']) diff --git a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py index e7021284cd76b2d4639b4ef8481ab32e16ea91df..e30d80fc2984e6592b662353629c3a68f8767380 100644 --- a/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py +++ b/modules/text/sentiment_analysis/ernie_skep_sentiment_analysis/module.py @@ -139,14 +139,29 @@ class ErnieSkepSentimentAnalysis(TransformerModule): ) results = [] + feature_list = [] for text in texts: + # feature.shape: [1, 512, 1] + # batch on the first dimension feature = self._convert_text_to_feature(text) - inputs = [self.array2tensor(ndarray) for ndarray in feature] - output = self.predictor.run(inputs) - probilities = np.array(output[0].data.float_data()) + feature_list.append(feature) + + feature_batch = [ + np.concatenate([feature[0] for feature in feature_list], axis=0), + np.concatenate([feature[1] for feature in feature_list], axis=0), + np.concatenate([feature[2] for feature in feature_list], axis=0), + np.concatenate([feature[3] for feature in feature_list], axis=0), + np.concatenate([feature[4] for feature in feature_list], axis=0), + ] + + inputs = [self.array2tensor(ndarray) for ndarray in feature_batch] + output = self.predictor.run(inputs) + probilities_list = np.array(output[0].data.float_data()) + probilities_list = probilities_list.reshape((-1, 2)) + for i, probilities in enumerate(probilities_list): label = self.label_map[np.argmax(probilities)] result = { - 'text': text, + 'text': texts[i], 'sentiment_label': label, 'positive_probs': probilities[1], 'negative_probs': probilities[0] diff --git a/modules/text/sentiment_analysis/senta_bilstm/README_en.md b/modules/text/sentiment_analysis/senta_bilstm/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..ae7ca125aedb351dc9a01051cbe693015cc3641b --- /dev/null +++ b/modules/text/sentiment_analysis/senta_bilstm/README_en.md @@ -0,0 +1,190 @@ +# senta_bilstm + +| Module Name | senta_bilstm | +| :------------------ | :------------: | +| Category | text-sentiment_analysis | +| Network | BiLSTM | +| Dataset | Dataset built by Baidu | +| Fine-tuning supported or not | No | +| Module Size | 690M | +| Latest update date | 2021-02-26 | +| Data indicators | - | + + +## I. Basic Information of Module + +- ### Module Introduction + + - Sentiment Classification (Senta for short) can automatically judge the emotional polarity category of Chinese texts with subjective description and give corresponding confidence, which can help enterprises understand users' consumption habits, analyze hot topics and crisis public opinion monitoring, and provide favorable decision support for enterprises. The model is based on a bidirectional LSTM structure, with positive and negative emotion types. + + + +## II. Installation + +- ### 1、Environmental dependence + + - paddlepaddle >= 1.8.0 + + - paddlehub >= 1.8.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install senta_bilstm + ``` + - If you have problems during installation, please refer to:[windows_quickstart](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [linux_quickstart](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [mac_quickstart](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## III. Module API and Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run senta_bilstm --input_text "这家餐厅很好吃" + ``` + or + - ```shell + $ hub run senta_bilstm --input_file test.txt + ``` + - test.txt stores the text to be predicted, for example: + + > 这家餐厅很好吃 + + > 这部电影真的很差劲 + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command line instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + + senta = hub.Module(name="senta_bilstm") + test_text = ["这家餐厅很好吃", "这部电影真的很差劲"] + results = senta.sentiment_classify(texts=test_text, + use_gpu=False, + batch_size=1) + + for result in results: + print(result['text']) + print(result['sentiment_label']) + print(result['sentiment_key']) + print(result['positive_probs']) + print(result['negative_probs']) + + # 这家餐厅很好吃 1 positive 0.9407 0.0593 + # 这部电影真的很差劲 0 negative 0.02 0.98 + ``` + +- ### 3、API + + - ```python + def sentiment_classify(texts=[], data={}, use_gpu=False, batch_size=1) + ``` + + - senta_bilstm predicting interfaces, predicting sentiment classification of input sentences (dichotomies, positive/negative) + + - **Parameter** + + - texts(list): data to be predicted, if texts parameter is used, there is no need to pass in data parameter. You can use any of the two parameters. + - data(dict): predicted data , key must be text,value is data to be predicted. if data parameter is used, there is no need to pass in texts parameter. You can use any of the two parameters. It is suggested to use texts parameter, and data parameter will be discarded later. + - use_gpu(bool): use GPU or not. If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before prediction. Otherwise, need not set it. + - batch_size(int): batch size + + - **Return** + + - results(list): result of sentiment classification + + + - ```python + def get_labels() + ``` + - get the category of senta_bilstm + + - **Return** + + - labels(dict): the category of senta_bilstm(Dichotomies, positive/negative) + + - ```python + def get_vocab_path() + ``` + - Get a vocabulary for pre-training + + - **Return** + + - vocab_path(str): Vocabulary path + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online sentiment analysis detection service and you can use this interface for online Web applications. + +- ## Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m senta_bilstm + ``` + + - The model loading process is displayed on startup. After the startup is successful, the following information is displayed: + - ```shell + Loading senta_bilstm successful. + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before prediction. Otherwise, need not set it. + +- ## Step 2: Send a predictive request + + - After configuring the server, the following lines of code can be used to send the prediction request and obtain the prediction result + + - ```python + import requests + import json + + # data to be predicted + text = ["这家餐厅很好吃", "这部电影真的很差劲"] + + # Set the running configuration + # Corresponding to local prediction senta_bilstm.sentiment_classify(texts=text, batch_size=1, use_gpu=True) + data = {"texts": text, "batch_size": 1, "use_gpu":True} + + # set the prediction method to senta_bilstm and send a POST request, content-type should be set to json + # HOST_IP is the IP address of the server + url = "http://HOST_IP:8866/predict/senta_bilstm" + headers = {"Content-Type": "application/json"} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction result + print(json.dumps(r.json(), indent=4, ensure_ascii=False)) + ``` + + - For more information about PaddleHub Serving, please refer to:[Serving Deployment](../../../../docs/docs_ch/tutorial/serving.md) + + + +## V. Release Note + +* 1.0.0 + + First release + +* 1.0.1 + + Vocabulary upgrade + +* 1.1.0 + + Significantly improve predictive performance + +* 1.2.0 + + Model upgrade, support transfer learning for text classification, text matching and other tasks + - ```shell + $ hub install senta_bilstm==1.2.0 + ``` diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/README.md b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d439008e00dce03b1cb19c90aefc1c404ea7e7a0 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/README.md @@ -0,0 +1,210 @@ +# transformer_nist_wait_1 +|模型名称|transformer_nist_wait_1| +| :--- | :---: | +|类别|同声传译| +|网络|transformer| +|数据集|NIST 2008-中英翻译数据集| +|是否支持Fine-tuning|否| +|模型大小|377MB| +|最新更新日期|2021-09-17| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - 同声传译(Simultaneous Translation),即在句子完成之前进行翻译,同声传译的目标是实现同声传译的自动化,它可以与源语言同时翻译,延迟时间只有几秒钟。 + STACL 是论文 [STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework](https://www.aclweb.org/anthology/P19-1289/) 中针对同传提出的适用于所有同传场景的翻译架构。 + - STACL 主要具有以下优势: + + - Prefix-to-Prefix架构拥有预测能力,即在未看到源词的情况下仍然可以翻译出对应的目标词,克服了SOV→SVO等词序差异 +

+
+

+ 和传统的机器翻译模型主要的区别在于翻译时是否需要利用全句的源句。上图中,Seq2Seq模型需要等到全句的源句(1-5)全部输入Encoder后,Decoder才开始解码进行翻译;而STACL架构采用了Wait-k(图中Wait-2)的策略,当源句只有两个词(1和2)输入到Encoder后,Decoder即可开始解码预测目标句的第一个词。 + + - Wait-k策略可以不需要全句的源句,直接预测目标句,可以实现任意的字级延迟,同时保持较高的翻译质量。 +

+
+

+ Wait-k策略首先等待源句单词,然后与源句的其余部分同时翻译,即输出总是隐藏在输入后面。这是受到同声传译人员的启发,同声传译人员通常会在几秒钟内开始翻译演讲者的演讲,在演讲者结束几秒钟后完成。例如,如果k=2,第一个目标词使用前2个源词预测,第二个目标词使用前3个源词预测,以此类推。上图中,(a)simultaneous: our wait-2 等到"布什"和"总统"输入后就开始解码预测"pres.",而(b) non-simultaneous baseline 为传统的翻译模型,需要等到整句"布什 总统 在 莫斯科 与 普京 会晤"才开始解码预测。 + + - 该PaddleHub Module基于transformer网络结构,采用wait-1策略进行中文到英文的翻译。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install transformer_nist_wait_1 + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + model = hub.Module(name="transformer_nist_wait_1") + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + for t in text: + print("input: {}".format(t)) + result = model.translate(t) + print("model output: {}\n".format(result)) + + # input: 他 + # model output: he + # + # input: 他还 + # model output: he also + # + # input: 他还说 + # model output: he also said + # + # input: 他还说现在 + # model output: he also said that + # + # input: 他还说现在正在 + # model output: he also said that he + # + # input: 他还说现在正在为 + # model output: he also said that he is + # + # input: 他还说现在正在为这 + # model output: he also said that he is making + # + # input: 他还说现在正在为这一 + # model output: he also said that he is making preparations + # + # input: 他还说现在正在为这一会议 + # model output: he also said that he is making preparations for + # + # input: 他还说现在正在为这一会议作出 + # model output: he also said that he is making preparations for this + # + # input: 他还说现在正在为这一会议作出安排 + # model output: he also said that he is making preparations for this meeting + # + # input: 他还说现在正在为这一会议作出安排。 + # model output: he also said that he is making preparations for this meeting . + ``` + +- ### 2、 API + + - ```python + __init__(max_length=256, max_out_len=256) + ``` + + - 初始化module, 可配置模型的输入文本的最大长度 + + - **参数** + + - max_length(int): 输入文本的最大长度,默认值为256。 + - max_out_len(int): 输出文本的最大解码长度,超过最大解码长度时会截断句子的后半部分,默认值为256。 + + - ```python + translate(text, use_gpu=False) + ``` + + - 预测API,输入源语言的文本(模拟同传语音输入),解码后输出翻译后的目标语言文本。 + + - **参数** + + - text(str): 输入源语言的文本,数据类型为str + - use_gpu(bool): 是否使用gpu进行预测,默认为False + + - **返回** + + - result(str): 翻译后的目标语言文本。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线同声传译服务(需要用户配置一个语音转文本应用预先将语音输入转为中文文字),可以将此接口用于在线web应用。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m transformer_nist_wait_1 + ``` + + - 启动时会显示加载模型过程,启动成功后显示 + + - ```shell + Loading transformer_nist_wait_1 successful. + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + # 指定预测方法为transformer_nist_wait_1并发送post请求,content-type类型应指定json方式 + # HOST_IP为服务器IP + url = "http://HOST_IP:8866/predict/transformer_nist_wait_1" + headers = {"Content-Type": "application/json"} + for t in text: + print("input: {}".format(t)) + result = requests.post(url=url, headers=headers, data=json.dumps({"text": t})) + # 打印预测结果 + print("model output: {}\n".format(result.json()['results'])) + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../../docs/docs_ch/tutorial/serving.md) + +## 五、更新历史 + +* 1.0.0 + 初始发布 + ```shell + hub install transformer_nist_wait_1==1.0.0 + ``` diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/__init__.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/model.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/model.py new file mode 100644 index 0000000000000000000000000000000000000000..32cfe670981b1b3bc3e782997679c00242f110e6 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/model.py @@ -0,0 +1,339 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding + + +class DecoderLayer(nn.TransformerDecoderLayer): + def __init__(self, *args, **kwargs): + super(DecoderLayer, self).__init__(*args, **kwargs) + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + residual = tgt + if self.normalize_before: + tgt = self.norm1(tgt) + if cache is None: + tgt = self.self_attn(tgt, tgt, tgt, tgt_mask, None) + else: + tgt, incremental_cache = self.self_attn(tgt, tgt, tgt, tgt_mask, + cache[0]) + tgt = residual + self.dropout1(tgt) + if not self.normalize_before: + tgt = self.norm1(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm2(tgt) + if len(memory) == 1: + # Full sent + tgt = self.cross_attn(tgt, memory[0], memory[0], memory_mask, None) + else: + # Wait-k policy + cross_attn_outputs = [] + for i in range(tgt.shape[1]): + q = tgt[:, i:i + 1, :] + if i >= len(memory): + e = memory[-1] + else: + e = memory[i] + cross_attn_outputs.append( + self.cross_attn(q, e, e, memory_mask[:, :, i:i + 1, : + e.shape[1]], None)) + tgt = paddle.concat(cross_attn_outputs, axis=1) + tgt = residual + self.dropout2(tgt) + if not self.normalize_before: + tgt = self.norm2(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm3(tgt) + tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = residual + self.dropout3(tgt) + if not self.normalize_before: + tgt = self.norm3(tgt) + return tgt if cache is None else (tgt, (incremental_cache, )) + + +class Decoder(nn.TransformerDecoder): + """ + PaddlePaddle 2.1 casts memory_mask.dtype to memory.dtype, but in STACL, + type of memory is list, having no dtype attribute. + """ + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + output = tgt + new_caches = [] + for i, mod in enumerate(self.layers): + if cache is None: + output = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=None) + else: + output, new_cache = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=cache[i]) + new_caches.append(new_cache) + + if self.norm is not None: + output = self.norm(output) + + return output if cache is None else (output, new_caches) + + +class SimultaneousTransformer(nn.Layer): + """ + model + """ + def __init__(self, + src_vocab_size, + trg_vocab_size, + max_length=256, + n_layer=6, + n_head=8, + d_model=512, + d_inner_hid=2048, + dropout=0.1, + weight_sharing=False, + bos_id=0, + eos_id=1, + waitk=-1): + super(SimultaneousTransformer, self).__init__() + self.trg_vocab_size = trg_vocab_size + self.emb_dim = d_model + self.bos_id = bos_id + self.eos_id = eos_id + self.dropout = dropout + self.waitk = waitk + self.n_layer = n_layer + self.n_head = n_head + self.d_model = d_model + + self.src_word_embedding = WordEmbedding( + vocab_size=src_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.src_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + if weight_sharing: + assert src_vocab_size == trg_vocab_size, ( + "Vocabularies in source and target should be same for weight sharing." + ) + self.trg_word_embedding = self.src_word_embedding + self.trg_pos_embedding = self.src_pos_embedding + else: + self.trg_word_embedding = WordEmbedding( + vocab_size=trg_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.trg_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + + encoder_layer = nn.TransformerEncoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, True]) + encoder_norm = nn.LayerNorm(d_model) + self.encoder = nn.TransformerEncoder( + encoder_layer=encoder_layer, num_layers=n_layer, norm=encoder_norm) + + decoder_layer = DecoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, False, True]) + decoder_norm = nn.LayerNorm(d_model) + self.decoder = Decoder( + decoder_layer=decoder_layer, num_layers=n_layer, norm=decoder_norm) + + if weight_sharing: + self.linear = lambda x: paddle.matmul( + x=x, y=self.trg_word_embedding.word_embedding.weight, transpose_y=True) + else: + self.linear = nn.Linear( + in_features=d_model, + out_features=trg_vocab_size, + bias_attr=False) + + def forward(self, src_word, trg_word): + src_max_len = paddle.shape(src_word)[-1] + trg_max_len = paddle.shape(trg_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_slf_attn_bias = paddle.tensor.triu( + (paddle.ones( + (trg_max_len, trg_max_len), + dtype=paddle.get_default_dtype()) * -np.inf), + 1) + trg_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, trg_max_len, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + trg_pos = paddle.cast( + trg_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=trg_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + with paddle.static.amp.fp16_guard(): + if self.waitk >= src_max_len or self.waitk == -1: + # Full sentence + enc_outputs = [ + self.encoder( + enc_input, src_mask=src_slf_attn_bias) + ] + else: + # Wait-k policy + enc_outputs = [] + for i in range(self.waitk, src_max_len + 1): + enc_output = self.encoder( + enc_input[:, :i, :], + src_mask=src_slf_attn_bias[:, :, :, :i]) + enc_outputs.append(enc_output) + + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + dec_output = self.decoder( + dec_input, + enc_outputs, + tgt_mask=trg_slf_attn_bias, + memory_mask=trg_src_attn_bias) + + predict = self.linear(dec_output) + + return predict + + def beam_search(self, src_word, beam_size=4, max_len=256, waitk=-1): + # TODO: "Speculative Beam Search for Simultaneous Translation" + raise NotImplementedError + + def greedy_search(self, + src_word, + max_len=256, + waitk=-1, + caches=None, + bos_id=None): + """ + greedy_search uses streaming reader. It doesn't need calling + encoder many times, an a sub-sentence just needs calling encoder once. + So, it needs previous state(caches) and last one of generated + tokens id last time. + """ + src_max_len = paddle.shape(src_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)] + + # constant number + batch_size = enc_outputs[-1].shape[0] + max_len = ( + enc_outputs[-1].shape[1] + 20) if max_len is None else max_len + end_token_tensor = paddle.full( + shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") + + predict_ids = [] + log_probs = paddle.full( + shape=[batch_size, 1], fill_value=0, dtype="float32") + if not bos_id: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") + else: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=bos_id, dtype="int64") + + # init states (caches) for transformer + if not caches: + caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False) + + for i in range(max_len): + trg_pos = paddle.full( + shape=trg_word.shape, fill_value=i, dtype="int64") + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + + if waitk < 0 or i >= len(enc_outputs): + # if the decoder step is full sent or longer than all source + # step, then read the whole src + _e = enc_outputs[-1] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + else: + _e = enc_outputs[i] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + + dec_output = paddle.reshape( + dec_output, shape=[-1, dec_output.shape[-1]]) + + logits = self.linear(dec_output) + step_log_probs = paddle.log(F.softmax(logits, axis=-1)) + log_probs = paddle.add(x=step_log_probs, y=log_probs) + scores = log_probs + topk_scores, topk_indices = paddle.topk(x=scores, k=1) + + finished = paddle.equal(topk_indices, end_token_tensor) + trg_word = topk_indices + log_probs = topk_scores + + predict_ids.append(topk_indices) + + if paddle.all(finished).numpy(): + break + + predict_ids = paddle.stack(predict_ids, axis=0) + finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) + finished_scores = topk_scores + + return finished_seq, finished_scores, caches \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/module.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/module.py new file mode 100644 index 0000000000000000000000000000000000000000..93a0d21b4b943eb1d5f3286e36576437f4ba5057 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/module.py @@ -0,0 +1,125 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import jieba +import paddle +from paddlenlp.transformers import position_encoding_init +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving + +from transformer_nist_wait_1.model import SimultaneousTransformer +from transformer_nist_wait_1.processor import STACLTokenizer, predict + + +@moduleinfo( + name="transformer_nist_wait_1", + version="1.0.0", + summary="", + author="PaddlePaddle", + author_email="", + type="nlp/simultaneous_translation", +) +class STTransformer(): + """ + Transformer model for simultaneous translation. + """ + + # Model config + model_config = { + # Number of head used in multi-head attention. + "n_head": 8, + # Number of sub-layers to be stacked in the encoder and decoder. + "n_layer": 6, + # The dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + "d_model": 512, + } + + def __init__(self, + max_length=256, + max_out_len=256, + ): + super(STTransformer, self).__init__() + bpe_codes_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_1", "assets", "2M.zh2en.dict4bpe.zh") + src_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_1", "assets", "nist.20k.zh.vocab") + trg_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_1", "assets", "nist.10k.en.vocab") + params_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_1", "assets", "transformer.pdparams") + self.max_length = max_length + self.max_out_len = max_out_len + self.tokenizer = STACLTokenizer( + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + ) + src_vocab_size = self.tokenizer.src_vocab_size + trg_vocab_size = self.tokenizer.trg_vocab_size + self.transformer = SimultaneousTransformer( + src_vocab_size, + trg_vocab_size, + max_length=self.max_length, + n_layer=self.model_config['n_layer'], + n_head=self.model_config['n_head'], + d_model=self.model_config['d_model'], + ) + model_dict = paddle.load(params_fpath) + # To avoid a longer length than training, reset the size of position + # encoding to max_length + model_dict["src_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + model_dict["trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + self.transformer.load_dict(model_dict) + + @serving + def translate(self, text, use_gpu=False): + paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') + + # Word segmentation + text = ' '.join(jieba.cut(text)) + # For decoding max length + decoder_max_length = 1 + # For decoding cache + cache = None + # For decoding start token id + bos_id = None + # Current source word index + i = 0 + # For decoding: is_last=True, max_len=256 + is_last = False + # Tokenized id + user_input_tokenized = [] + # Store the translation + result = [] + + bpe_str, tokenized_src = self.tokenizer.tokenize(text) + while i < len(tokenized_src): + user_input_tokenized.append(tokenized_src[i]) + if bpe_str[i] in ['。', '?', '!']: + is_last = True + result, cache, bos_id = predict( + user_input_tokenized, + decoder_max_length, + is_last, + cache, + bos_id, + result, + self.tokenizer, + self.transformer, + max_out_len=self.max_out_len) + i += 1 + return " ".join(result) \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/processor.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..2944ee9639f3f72caa8761422a57e720ec29c081 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/processor.py @@ -0,0 +1,124 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddlenlp.data import Vocab +from subword_nmt import subword_nmt + + +class STACLTokenizer: + """ + Jieba+BPE, and convert tokens to ids. + """ + + def __init__(self, + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + special_token=["", "", ""]): + bpe_parser = subword_nmt.create_apply_bpe_parser() + bpe_args = bpe_parser.parse_args(args=['-c', bpe_codes_fpath]) + bpe_args.codes.close() + bpe_args.codes = open(bpe_codes_fpath, 'r', encoding='utf-8') + self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, + bpe_args.separator, None, + bpe_args.glossaries) + + self.src_vocab = Vocab.load_vocabulary( + src_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.trg_vocab = Vocab.load_vocabulary( + trg_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.src_vocab_size = len(self.src_vocab) + self.trg_vocab_size = len(self.trg_vocab) + + def tokenize(self, text): + bpe_str = self.bpe.process_line(text) + ids = self.src_vocab.to_indices(bpe_str.split()) + return bpe_str.split(), ids + + +def post_process_seq(seq, + bos_idx=0, + eos_idx=1, + output_bos=False, + output_eos=False): + """ + Post-process the decoded sequence. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = [ + idx for idx in seq[:eos_pos + 1] + if (output_bos or idx != bos_idx) and (output_eos or idx != eos_idx) + ] + return seq + + +def predict(tokenized_src, + decoder_max_length, + is_last, + cache, + bos_id, + result, + tokenizer, + transformer, + n_best=1, + max_out_len=256, + eos_idx=1, + waitk=1, + ): + # Set evaluate mode + transformer.eval() + + if len(tokenized_src) < waitk: + return result, cache, bos_id + + with paddle.no_grad(): + paddle.disable_static() + input_src = tokenized_src + if is_last: + decoder_max_length = max_out_len + input_src += [eos_idx] + src_word = paddle.to_tensor(input_src).unsqueeze(axis=0) + finished_seq, finished_scores, cache = transformer.greedy_search( + src_word, + max_len=decoder_max_length, + waitk=waitk, + caches=cache, + bos_id=bos_id) + finished_seq = finished_seq.numpy() + for beam_idx, beam in enumerate(finished_seq[0]): + if beam_idx >= n_best: + break + id_list = post_process_seq(beam) + if len(id_list) == 0: + continue + bos_id = id_list[-1] + word_list = tokenizer.trg_vocab.to_tokens(id_list) + for word in word_list: + result.append(word) + res = ' '.join(word_list).replace('@@ ', '') + paddle.enable_static() + return result, cache, bos_id \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/requirements.txt b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..46ecba73afadc86c801fbeb72c72f7c7036491f0 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_1/requirements.txt @@ -0,0 +1,2 @@ +jieba==0.42.1 +subword-nmt==0.3.7 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/README.md b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d0a8003e7b519e3bbad0a27f870a6fc47d51d067 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/README.md @@ -0,0 +1,210 @@ +# transformer_nist_wait_3 +|模型名称|transformer_nist_wait_3| +| :--- | :---: | +|类别|同声传译| +|网络|transformer| +|数据集|NIST 2008-中英翻译数据集| +|是否支持Fine-tuning|否| +|模型大小|377MB| +|最新更新日期|2021-09-17| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - 同声传译(Simultaneous Translation),即在句子完成之前进行翻译,同声传译的目标是实现同声传译的自动化,它可以与源语言同时翻译,延迟时间只有几秒钟。 + STACL 是论文 [STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework](https://www.aclweb.org/anthology/P19-1289/) 中针对同传提出的适用于所有同传场景的翻译架构。 + - STACL 主要具有以下优势: + + - Prefix-to-Prefix架构拥有预测能力,即在未看到源词的情况下仍然可以翻译出对应的目标词,克服了SOV→SVO等词序差异 +

+
+

+ 和传统的机器翻译模型主要的区别在于翻译时是否需要利用全句的源句。上图中,Seq2Seq模型需要等到全句的源句(1-5)全部输入Encoder后,Decoder才开始解码进行翻译;而STACL架构采用了Wait-k(图中Wait-2)的策略,当源句只有两个词(1和2)输入到Encoder后,Decoder即可开始解码预测目标句的第一个词。 + + - Wait-k策略可以不需要全句的源句,直接预测目标句,可以实现任意的字级延迟,同时保持较高的翻译质量。 +

+
+

+ Wait-k策略首先等待源句单词,然后与源句的其余部分同时翻译,即输出总是隐藏在输入后面。这是受到同声传译人员的启发,同声传译人员通常会在几秒钟内开始翻译演讲者的演讲,在演讲者结束几秒钟后完成。例如,如果k=2,第一个目标词使用前2个源词预测,第二个目标词使用前3个源词预测,以此类推。上图中,(a)simultaneous: our wait-2 等到"布什"和"总统"输入后就开始解码预测"pres.",而(b) non-simultaneous baseline 为传统的翻译模型,需要等到整句"布什 总统 在 莫斯科 与 普京 会晤"才开始解码预测。 + + - 该PaddleHub Module基于transformer网络结构,采用wait-3策略进行中文到英文的翻译。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install transformer_nist_wait_3 + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + model = hub.Module(name="transformer_nist_wait_3") + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + for t in text: + print("input: {}".format(t)) + result = model.translate(t) + print("model output: {}\n".format(result)) + + # input: 他 + # model output: + # + # input: 他还 + # model output: + # + # input: 他还说 + # model output: he + # + # input: 他还说现在 + # model output: he also + # + # input: 他还说现在正在 + # model output: he also said + # + # input: 他还说现在正在为 + # model output: he also said that + # + # input: 他还说现在正在为这 + # model output: he also said that he + # + # input: 他还说现在正在为这一 + # model output: he also said that he is + # + # input: 他还说现在正在为这一会议 + # model output: he also said that he is making + # + # input: 他还说现在正在为这一会议作出 + # model output: he also said that he is making preparations + # + # input: 他还说现在正在为这一会议作出安排 + # model output: he also said that he is making preparations for + # + # input: 他还说现在正在为这一会议作出安排。 + # model output: he also said that he is making preparations for this meeting . + ``` + +- ### 2、 API + + - ```python + __init__(max_length=256, max_out_len=256) + ``` + + - 初始化module, 可配置模型的输入文本的最大长度 + + - **参数** + + - max_length(int): 输入文本的最大长度,默认值为256。 + - max_out_len(int): 输出文本的最大解码长度,超过最大解码长度时会截断句子的后半部分,默认值为256。 + + - ```python + translate(text, use_gpu=False) + ``` + + - 预测API,输入源语言的文本(模拟同传语音输入),解码后输出翻译后的目标语言文本。 + + - **参数** + + - text(str): 输入源语言的文本,数据类型为str + - use_gpu(bool): 是否使用gpu进行预测,默认为False + + - **返回** + + - result(str): 翻译后的目标语言文本。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线同声传译服务(需要用户配置一个语音转文本应用预先将语音输入转为中文文字),可以将此接口用于在线web应用。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m transformer_nist_wait_3 + ``` + + - 启动时会显示加载模型过程,启动成功后显示 + + - ```shell + Loading transformer_nist_wait_3 successful. + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + # 指定预测方法为transformer_nist_wait_3并发送post请求,content-type类型应指定json方式 + # HOST_IP为服务器IP + url = "http://HOST_IP:8866/predict/transformer_nist_wait_3" + headers = {"Content-Type": "application/json"} + for t in text: + print("input: {}".format(t)) + result = requests.post(url=url, headers=headers, data=json.dumps({"text": t})) + # 打印预测结果 + print("model output: {}\n".format(result.json()['results'])) + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + +## 五、更新历史 + +* 1.0.0 + 初始发布 + ```shell + hub install transformer_nist_wait_3==1.0.0 + ``` diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/__init__.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/model.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/model.py new file mode 100644 index 0000000000000000000000000000000000000000..32cfe670981b1b3bc3e782997679c00242f110e6 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/model.py @@ -0,0 +1,339 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding + + +class DecoderLayer(nn.TransformerDecoderLayer): + def __init__(self, *args, **kwargs): + super(DecoderLayer, self).__init__(*args, **kwargs) + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + residual = tgt + if self.normalize_before: + tgt = self.norm1(tgt) + if cache is None: + tgt = self.self_attn(tgt, tgt, tgt, tgt_mask, None) + else: + tgt, incremental_cache = self.self_attn(tgt, tgt, tgt, tgt_mask, + cache[0]) + tgt = residual + self.dropout1(tgt) + if not self.normalize_before: + tgt = self.norm1(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm2(tgt) + if len(memory) == 1: + # Full sent + tgt = self.cross_attn(tgt, memory[0], memory[0], memory_mask, None) + else: + # Wait-k policy + cross_attn_outputs = [] + for i in range(tgt.shape[1]): + q = tgt[:, i:i + 1, :] + if i >= len(memory): + e = memory[-1] + else: + e = memory[i] + cross_attn_outputs.append( + self.cross_attn(q, e, e, memory_mask[:, :, i:i + 1, : + e.shape[1]], None)) + tgt = paddle.concat(cross_attn_outputs, axis=1) + tgt = residual + self.dropout2(tgt) + if not self.normalize_before: + tgt = self.norm2(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm3(tgt) + tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = residual + self.dropout3(tgt) + if not self.normalize_before: + tgt = self.norm3(tgt) + return tgt if cache is None else (tgt, (incremental_cache, )) + + +class Decoder(nn.TransformerDecoder): + """ + PaddlePaddle 2.1 casts memory_mask.dtype to memory.dtype, but in STACL, + type of memory is list, having no dtype attribute. + """ + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + output = tgt + new_caches = [] + for i, mod in enumerate(self.layers): + if cache is None: + output = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=None) + else: + output, new_cache = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=cache[i]) + new_caches.append(new_cache) + + if self.norm is not None: + output = self.norm(output) + + return output if cache is None else (output, new_caches) + + +class SimultaneousTransformer(nn.Layer): + """ + model + """ + def __init__(self, + src_vocab_size, + trg_vocab_size, + max_length=256, + n_layer=6, + n_head=8, + d_model=512, + d_inner_hid=2048, + dropout=0.1, + weight_sharing=False, + bos_id=0, + eos_id=1, + waitk=-1): + super(SimultaneousTransformer, self).__init__() + self.trg_vocab_size = trg_vocab_size + self.emb_dim = d_model + self.bos_id = bos_id + self.eos_id = eos_id + self.dropout = dropout + self.waitk = waitk + self.n_layer = n_layer + self.n_head = n_head + self.d_model = d_model + + self.src_word_embedding = WordEmbedding( + vocab_size=src_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.src_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + if weight_sharing: + assert src_vocab_size == trg_vocab_size, ( + "Vocabularies in source and target should be same for weight sharing." + ) + self.trg_word_embedding = self.src_word_embedding + self.trg_pos_embedding = self.src_pos_embedding + else: + self.trg_word_embedding = WordEmbedding( + vocab_size=trg_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.trg_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + + encoder_layer = nn.TransformerEncoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, True]) + encoder_norm = nn.LayerNorm(d_model) + self.encoder = nn.TransformerEncoder( + encoder_layer=encoder_layer, num_layers=n_layer, norm=encoder_norm) + + decoder_layer = DecoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, False, True]) + decoder_norm = nn.LayerNorm(d_model) + self.decoder = Decoder( + decoder_layer=decoder_layer, num_layers=n_layer, norm=decoder_norm) + + if weight_sharing: + self.linear = lambda x: paddle.matmul( + x=x, y=self.trg_word_embedding.word_embedding.weight, transpose_y=True) + else: + self.linear = nn.Linear( + in_features=d_model, + out_features=trg_vocab_size, + bias_attr=False) + + def forward(self, src_word, trg_word): + src_max_len = paddle.shape(src_word)[-1] + trg_max_len = paddle.shape(trg_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_slf_attn_bias = paddle.tensor.triu( + (paddle.ones( + (trg_max_len, trg_max_len), + dtype=paddle.get_default_dtype()) * -np.inf), + 1) + trg_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, trg_max_len, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + trg_pos = paddle.cast( + trg_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=trg_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + with paddle.static.amp.fp16_guard(): + if self.waitk >= src_max_len or self.waitk == -1: + # Full sentence + enc_outputs = [ + self.encoder( + enc_input, src_mask=src_slf_attn_bias) + ] + else: + # Wait-k policy + enc_outputs = [] + for i in range(self.waitk, src_max_len + 1): + enc_output = self.encoder( + enc_input[:, :i, :], + src_mask=src_slf_attn_bias[:, :, :, :i]) + enc_outputs.append(enc_output) + + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + dec_output = self.decoder( + dec_input, + enc_outputs, + tgt_mask=trg_slf_attn_bias, + memory_mask=trg_src_attn_bias) + + predict = self.linear(dec_output) + + return predict + + def beam_search(self, src_word, beam_size=4, max_len=256, waitk=-1): + # TODO: "Speculative Beam Search for Simultaneous Translation" + raise NotImplementedError + + def greedy_search(self, + src_word, + max_len=256, + waitk=-1, + caches=None, + bos_id=None): + """ + greedy_search uses streaming reader. It doesn't need calling + encoder many times, an a sub-sentence just needs calling encoder once. + So, it needs previous state(caches) and last one of generated + tokens id last time. + """ + src_max_len = paddle.shape(src_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)] + + # constant number + batch_size = enc_outputs[-1].shape[0] + max_len = ( + enc_outputs[-1].shape[1] + 20) if max_len is None else max_len + end_token_tensor = paddle.full( + shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") + + predict_ids = [] + log_probs = paddle.full( + shape=[batch_size, 1], fill_value=0, dtype="float32") + if not bos_id: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") + else: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=bos_id, dtype="int64") + + # init states (caches) for transformer + if not caches: + caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False) + + for i in range(max_len): + trg_pos = paddle.full( + shape=trg_word.shape, fill_value=i, dtype="int64") + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + + if waitk < 0 or i >= len(enc_outputs): + # if the decoder step is full sent or longer than all source + # step, then read the whole src + _e = enc_outputs[-1] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + else: + _e = enc_outputs[i] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + + dec_output = paddle.reshape( + dec_output, shape=[-1, dec_output.shape[-1]]) + + logits = self.linear(dec_output) + step_log_probs = paddle.log(F.softmax(logits, axis=-1)) + log_probs = paddle.add(x=step_log_probs, y=log_probs) + scores = log_probs + topk_scores, topk_indices = paddle.topk(x=scores, k=1) + + finished = paddle.equal(topk_indices, end_token_tensor) + trg_word = topk_indices + log_probs = topk_scores + + predict_ids.append(topk_indices) + + if paddle.all(finished).numpy(): + break + + predict_ids = paddle.stack(predict_ids, axis=0) + finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) + finished_scores = topk_scores + + return finished_seq, finished_scores, caches \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/module.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/module.py new file mode 100644 index 0000000000000000000000000000000000000000..b21d17c953253ed2bdf6b2393388ca834c2df5a4 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/module.py @@ -0,0 +1,125 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import jieba +import paddle +from paddlenlp.transformers import position_encoding_init +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving + +from transformer_nist_wait_3.model import SimultaneousTransformer +from transformer_nist_wait_3.processor import STACLTokenizer, predict + + +@moduleinfo( + name="transformer_nist_wait_3", + version="1.0.0", + summary="", + author="PaddlePaddle", + author_email="", + type="nlp/simultaneous_translation", +) +class STTransformer(): + """ + Transformer model for simultaneous translation. + """ + + # Model config + model_config = { + # Number of head used in multi-head attention. + "n_head": 8, + # Number of sub-layers to be stacked in the encoder and decoder. + "n_layer": 6, + # The dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + "d_model": 512, + } + + def __init__(self, + max_length=256, + max_out_len=256, + ): + super(STTransformer, self).__init__() + bpe_codes_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_3", "assets", "2M.zh2en.dict4bpe.zh") + src_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_3", "assets", "nist.20k.zh.vocab") + trg_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_3", "assets", "nist.10k.en.vocab") + params_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_3", "assets", "transformer.pdparams") + self.max_length = max_length + self.max_out_len = max_out_len + self.tokenizer = STACLTokenizer( + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + ) + src_vocab_size = self.tokenizer.src_vocab_size + trg_vocab_size = self.tokenizer.trg_vocab_size + self.transformer = SimultaneousTransformer( + src_vocab_size, + trg_vocab_size, + max_length=self.max_length, + n_layer=self.model_config['n_layer'], + n_head=self.model_config['n_head'], + d_model=self.model_config['d_model'], + ) + model_dict = paddle.load(params_fpath) + # To avoid a longer length than training, reset the size of position + # encoding to max_length + model_dict["src_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + model_dict["trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + self.transformer.load_dict(model_dict) + + @serving + def translate(self, text, use_gpu=False): + paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') + + # Word segmentation + text = ' '.join(jieba.cut(text)) + # For decoding max length + decoder_max_length = 1 + # For decoding cache + cache = None + # For decoding start token id + bos_id = None + # Current source word index + i = 0 + # For decoding: is_last=True, max_len=256 + is_last = False + # Tokenized id + user_input_tokenized = [] + # Store the translation + result = [] + + bpe_str, tokenized_src = self.tokenizer.tokenize(text) + while i < len(tokenized_src): + user_input_tokenized.append(tokenized_src[i]) + if bpe_str[i] in ['。', '?', '!']: + is_last = True + result, cache, bos_id = predict( + user_input_tokenized, + decoder_max_length, + is_last, + cache, + bos_id, + result, + self.tokenizer, + self.transformer, + max_out_len=self.max_out_len) + i += 1 + return " ".join(result) \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/processor.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..b723e91b0623bb88d38edd8cc49e36aaf62aaf5f --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/processor.py @@ -0,0 +1,124 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddlenlp.data import Vocab +from subword_nmt import subword_nmt + + +class STACLTokenizer: + """ + Jieba+BPE, and convert tokens to ids. + """ + + def __init__(self, + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + special_token=["", "", ""]): + bpe_parser = subword_nmt.create_apply_bpe_parser() + bpe_args = bpe_parser.parse_args(args=['-c', bpe_codes_fpath]) + bpe_args.codes.close() + bpe_args.codes = open(bpe_codes_fpath, 'r', encoding='utf-8') + self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, + bpe_args.separator, None, + bpe_args.glossaries) + + self.src_vocab = Vocab.load_vocabulary( + src_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.trg_vocab = Vocab.load_vocabulary( + trg_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.src_vocab_size = len(self.src_vocab) + self.trg_vocab_size = len(self.trg_vocab) + + def tokenize(self, text): + bpe_str = self.bpe.process_line(text) + ids = self.src_vocab.to_indices(bpe_str.split()) + return bpe_str.split(), ids + + +def post_process_seq(seq, + bos_idx=0, + eos_idx=1, + output_bos=False, + output_eos=False): + """ + Post-process the decoded sequence. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = [ + idx for idx in seq[:eos_pos + 1] + if (output_bos or idx != bos_idx) and (output_eos or idx != eos_idx) + ] + return seq + + +def predict(tokenized_src, + decoder_max_length, + is_last, + cache, + bos_id, + result, + tokenizer, + transformer, + n_best=1, + max_out_len=256, + eos_idx=1, + waitk=3, + ): + # Set evaluate mode + transformer.eval() + + if len(tokenized_src) < waitk: + return result, cache, bos_id + + with paddle.no_grad(): + paddle.disable_static() + input_src = tokenized_src + if is_last: + decoder_max_length = max_out_len + input_src += [eos_idx] + src_word = paddle.to_tensor(input_src).unsqueeze(axis=0) + finished_seq, finished_scores, cache = transformer.greedy_search( + src_word, + max_len=decoder_max_length, + waitk=waitk, + caches=cache, + bos_id=bos_id) + finished_seq = finished_seq.numpy() + for beam_idx, beam in enumerate(finished_seq[0]): + if beam_idx >= n_best: + break + id_list = post_process_seq(beam) + if len(id_list) == 0: + continue + bos_id = id_list[-1] + word_list = tokenizer.trg_vocab.to_tokens(id_list) + for word in word_list: + result.append(word) + res = ' '.join(word_list).replace('@@ ', '') + paddle.enable_static() + return result, cache, bos_id \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/requirements.txt b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..46ecba73afadc86c801fbeb72c72f7c7036491f0 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_3/requirements.txt @@ -0,0 +1,2 @@ +jieba==0.42.1 +subword-nmt==0.3.7 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/README.md b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/README.md new file mode 100644 index 0000000000000000000000000000000000000000..bafd730587c610b1358fd5bb0f14a48bdd3d9bc9 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/README.md @@ -0,0 +1,210 @@ +# transformer_nist_wait_5 +|模型名称|transformer_nist_wait_5| +| :--- | :---: | +|类别|同声传译| +|网络|transformer| +|数据集|NIST 2008-中英翻译数据集| +|是否支持Fine-tuning|否| +|模型大小|377MB| +|最新更新日期|2021-09-17| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - 同声传译(Simultaneous Translation),即在句子完成之前进行翻译,同声传译的目标是实现同声传译的自动化,它可以与源语言同时翻译,延迟时间只有几秒钟。 + STACL 是论文 [STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework](https://www.aclweb.org/anthology/P19-1289/) 中针对同传提出的适用于所有同传场景的翻译架构。 + - STACL 主要具有以下优势: + + - Prefix-to-Prefix架构拥有预测能力,即在未看到源词的情况下仍然可以翻译出对应的目标词,克服了SOV→SVO等词序差异 +

+
+

+ 和传统的机器翻译模型主要的区别在于翻译时是否需要利用全句的源句。上图中,Seq2Seq模型需要等到全句的源句(1-5)全部输入Encoder后,Decoder才开始解码进行翻译;而STACL架构采用了Wait-k(图中Wait-2)的策略,当源句只有两个词(1和2)输入到Encoder后,Decoder即可开始解码预测目标句的第一个词。 + + - Wait-k策略可以不需要全句的源句,直接预测目标句,可以实现任意的字级延迟,同时保持较高的翻译质量。 +

+
+

+ Wait-k策略首先等待源句单词,然后与源句的其余部分同时翻译,即输出总是隐藏在输入后面。这是受到同声传译人员的启发,同声传译人员通常会在几秒钟内开始翻译演讲者的演讲,在演讲者结束几秒钟后完成。例如,如果k=2,第一个目标词使用前2个源词预测,第二个目标词使用前3个源词预测,以此类推。上图中,(a)simultaneous: our wait-2 等到"布什"和"总统"输入后就开始解码预测"pres.",而(b) non-simultaneous baseline 为传统的翻译模型,需要等到整句"布什 总统 在 莫斯科 与 普京 会晤"才开始解码预测。 + + - 该PaddleHub Module基于transformer网络结构,采用wait-5策略进行中文到英文的翻译。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install transformer_nist_wait_5 + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + model = hub.Module(name="transformer_nist_wait_5") + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + for t in text: + print("input: {}".format(t)) + result = model.translate(t) + print("model output: {}\n".format(result)) + + # input: 他 + # model output: + # + # input: 他还 + # model output: + # + # input: 他还说 + # model output: + # + # input: 他还说现在 + # model output: + # + # input: 他还说现在正在 + # model output: he + # + # input: 他还说现在正在为 + # model output: he also + # + # input: 他还说现在正在为这 + # model output: he also said + # + # input: 他还说现在正在为这一 + # model output: he also said that + # + # input: 他还说现在正在为这一会议 + # model output: he also said that he + # + # input: 他还说现在正在为这一会议作出 + # model output: he also said that he was + # + # input: 他还说现在正在为这一会议作出安排 + # model output: he also said that he was making + # + # input: 他还说现在正在为这一会议作出安排。 + # model output: he also said that he was making arrangements for this meeting . + ``` + +- ### 2、 API + + - ```python + __init__(max_length=256, max_out_len=256) + ``` + + - 初始化module, 可配置模型的输入文本的最大长度 + + - **参数** + + - max_length(int): 输入文本的最大长度,默认值为256。 + - max_out_len(int): 输出文本的最大解码长度,超过最大解码长度时会截断句子的后半部分,默认值为256。 + + - ```python + translate(text, use_gpu=False) + ``` + + - 预测API,输入源语言的文本(模拟同传语音输入),解码后输出翻译后的目标语言文本。 + + - **参数** + + - text(str): 输入源语言的文本,数据类型为str + - use_gpu(bool): 是否使用gpu进行预测,默认为False + + - **返回** + + - result(str): 翻译后的目标语言文本。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线同声传译服务(需要用户配置一个语音转文本应用预先将语音输入转为中文文字),可以将此接口用于在线web应用。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m transformer_nist_wait_5 + ``` + + - 启动时会显示加载模型过程,启动成功后显示 + + - ```shell + Loading transformer_nist_wait_5 successful. + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + # 指定预测方法为transformer_nist_wait_5并发送post请求,content-type类型应指定json方式 + # HOST_IP为服务器IP + url = "http://HOST_IP:8866/predict/transformer_nist_wait_5" + headers = {"Content-Type": "application/json"} + for t in text: + print("input: {}".format(t)) + r = requests.post(url=url, headers=headers, data=json.dumps({"text": t})) + # 打印预测结果 + print("model output: {}\n".format(result.json()['results'])) + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + +## 五、更新历史 + +* 1.0.0 + 初始发布 + ```shell + hub install transformer_nist_wait_5==1.0.0 + ``` diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/__init__.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/model.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/model.py new file mode 100644 index 0000000000000000000000000000000000000000..32cfe670981b1b3bc3e782997679c00242f110e6 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/model.py @@ -0,0 +1,339 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding + + +class DecoderLayer(nn.TransformerDecoderLayer): + def __init__(self, *args, **kwargs): + super(DecoderLayer, self).__init__(*args, **kwargs) + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + residual = tgt + if self.normalize_before: + tgt = self.norm1(tgt) + if cache is None: + tgt = self.self_attn(tgt, tgt, tgt, tgt_mask, None) + else: + tgt, incremental_cache = self.self_attn(tgt, tgt, tgt, tgt_mask, + cache[0]) + tgt = residual + self.dropout1(tgt) + if not self.normalize_before: + tgt = self.norm1(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm2(tgt) + if len(memory) == 1: + # Full sent + tgt = self.cross_attn(tgt, memory[0], memory[0], memory_mask, None) + else: + # Wait-k policy + cross_attn_outputs = [] + for i in range(tgt.shape[1]): + q = tgt[:, i:i + 1, :] + if i >= len(memory): + e = memory[-1] + else: + e = memory[i] + cross_attn_outputs.append( + self.cross_attn(q, e, e, memory_mask[:, :, i:i + 1, : + e.shape[1]], None)) + tgt = paddle.concat(cross_attn_outputs, axis=1) + tgt = residual + self.dropout2(tgt) + if not self.normalize_before: + tgt = self.norm2(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm3(tgt) + tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = residual + self.dropout3(tgt) + if not self.normalize_before: + tgt = self.norm3(tgt) + return tgt if cache is None else (tgt, (incremental_cache, )) + + +class Decoder(nn.TransformerDecoder): + """ + PaddlePaddle 2.1 casts memory_mask.dtype to memory.dtype, but in STACL, + type of memory is list, having no dtype attribute. + """ + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + output = tgt + new_caches = [] + for i, mod in enumerate(self.layers): + if cache is None: + output = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=None) + else: + output, new_cache = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=cache[i]) + new_caches.append(new_cache) + + if self.norm is not None: + output = self.norm(output) + + return output if cache is None else (output, new_caches) + + +class SimultaneousTransformer(nn.Layer): + """ + model + """ + def __init__(self, + src_vocab_size, + trg_vocab_size, + max_length=256, + n_layer=6, + n_head=8, + d_model=512, + d_inner_hid=2048, + dropout=0.1, + weight_sharing=False, + bos_id=0, + eos_id=1, + waitk=-1): + super(SimultaneousTransformer, self).__init__() + self.trg_vocab_size = trg_vocab_size + self.emb_dim = d_model + self.bos_id = bos_id + self.eos_id = eos_id + self.dropout = dropout + self.waitk = waitk + self.n_layer = n_layer + self.n_head = n_head + self.d_model = d_model + + self.src_word_embedding = WordEmbedding( + vocab_size=src_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.src_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + if weight_sharing: + assert src_vocab_size == trg_vocab_size, ( + "Vocabularies in source and target should be same for weight sharing." + ) + self.trg_word_embedding = self.src_word_embedding + self.trg_pos_embedding = self.src_pos_embedding + else: + self.trg_word_embedding = WordEmbedding( + vocab_size=trg_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.trg_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + + encoder_layer = nn.TransformerEncoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, True]) + encoder_norm = nn.LayerNorm(d_model) + self.encoder = nn.TransformerEncoder( + encoder_layer=encoder_layer, num_layers=n_layer, norm=encoder_norm) + + decoder_layer = DecoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, False, True]) + decoder_norm = nn.LayerNorm(d_model) + self.decoder = Decoder( + decoder_layer=decoder_layer, num_layers=n_layer, norm=decoder_norm) + + if weight_sharing: + self.linear = lambda x: paddle.matmul( + x=x, y=self.trg_word_embedding.word_embedding.weight, transpose_y=True) + else: + self.linear = nn.Linear( + in_features=d_model, + out_features=trg_vocab_size, + bias_attr=False) + + def forward(self, src_word, trg_word): + src_max_len = paddle.shape(src_word)[-1] + trg_max_len = paddle.shape(trg_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_slf_attn_bias = paddle.tensor.triu( + (paddle.ones( + (trg_max_len, trg_max_len), + dtype=paddle.get_default_dtype()) * -np.inf), + 1) + trg_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, trg_max_len, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + trg_pos = paddle.cast( + trg_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=trg_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + with paddle.static.amp.fp16_guard(): + if self.waitk >= src_max_len or self.waitk == -1: + # Full sentence + enc_outputs = [ + self.encoder( + enc_input, src_mask=src_slf_attn_bias) + ] + else: + # Wait-k policy + enc_outputs = [] + for i in range(self.waitk, src_max_len + 1): + enc_output = self.encoder( + enc_input[:, :i, :], + src_mask=src_slf_attn_bias[:, :, :, :i]) + enc_outputs.append(enc_output) + + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + dec_output = self.decoder( + dec_input, + enc_outputs, + tgt_mask=trg_slf_attn_bias, + memory_mask=trg_src_attn_bias) + + predict = self.linear(dec_output) + + return predict + + def beam_search(self, src_word, beam_size=4, max_len=256, waitk=-1): + # TODO: "Speculative Beam Search for Simultaneous Translation" + raise NotImplementedError + + def greedy_search(self, + src_word, + max_len=256, + waitk=-1, + caches=None, + bos_id=None): + """ + greedy_search uses streaming reader. It doesn't need calling + encoder many times, an a sub-sentence just needs calling encoder once. + So, it needs previous state(caches) and last one of generated + tokens id last time. + """ + src_max_len = paddle.shape(src_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)] + + # constant number + batch_size = enc_outputs[-1].shape[0] + max_len = ( + enc_outputs[-1].shape[1] + 20) if max_len is None else max_len + end_token_tensor = paddle.full( + shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") + + predict_ids = [] + log_probs = paddle.full( + shape=[batch_size, 1], fill_value=0, dtype="float32") + if not bos_id: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") + else: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=bos_id, dtype="int64") + + # init states (caches) for transformer + if not caches: + caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False) + + for i in range(max_len): + trg_pos = paddle.full( + shape=trg_word.shape, fill_value=i, dtype="int64") + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + + if waitk < 0 or i >= len(enc_outputs): + # if the decoder step is full sent or longer than all source + # step, then read the whole src + _e = enc_outputs[-1] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + else: + _e = enc_outputs[i] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + + dec_output = paddle.reshape( + dec_output, shape=[-1, dec_output.shape[-1]]) + + logits = self.linear(dec_output) + step_log_probs = paddle.log(F.softmax(logits, axis=-1)) + log_probs = paddle.add(x=step_log_probs, y=log_probs) + scores = log_probs + topk_scores, topk_indices = paddle.topk(x=scores, k=1) + + finished = paddle.equal(topk_indices, end_token_tensor) + trg_word = topk_indices + log_probs = topk_scores + + predict_ids.append(topk_indices) + + if paddle.all(finished).numpy(): + break + + predict_ids = paddle.stack(predict_ids, axis=0) + finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) + finished_scores = topk_scores + + return finished_seq, finished_scores, caches \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/module.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/module.py new file mode 100644 index 0000000000000000000000000000000000000000..14474b81f8c13e8744c1665e233c489676838e82 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/module.py @@ -0,0 +1,125 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import jieba +import paddle +from paddlenlp.transformers import position_encoding_init +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving + +from transformer_nist_wait_5.model import SimultaneousTransformer +from transformer_nist_wait_5.processor import STACLTokenizer, predict + + +@moduleinfo( + name="transformer_nist_wait_5", + version="1.0.0", + summary="", + author="PaddlePaddle", + author_email="", + type="nlp/simultaneous_translation", +) +class STTransformer(): + """ + Transformer model for simultaneous translation. + """ + + # Model config + model_config = { + # Number of head used in multi-head attention. + "n_head": 8, + # Number of sub-layers to be stacked in the encoder and decoder. + "n_layer": 6, + # The dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + "d_model": 512, + } + + def __init__(self, + max_length=256, + max_out_len=256, + ): + super(STTransformer, self).__init__() + bpe_codes_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_5", "assets", "2M.zh2en.dict4bpe.zh") + src_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_5", "assets", "nist.20k.zh.vocab") + trg_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_5", "assets", "nist.10k.en.vocab") + params_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_5", "assets", "transformer.pdparams") + self.max_length = max_length + self.max_out_len = max_out_len + self.tokenizer = STACLTokenizer( + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + ) + src_vocab_size = self.tokenizer.src_vocab_size + trg_vocab_size = self.tokenizer.trg_vocab_size + self.transformer = SimultaneousTransformer( + src_vocab_size, + trg_vocab_size, + max_length=self.max_length, + n_layer=self.model_config['n_layer'], + n_head=self.model_config['n_head'], + d_model=self.model_config['d_model'], + ) + model_dict = paddle.load(params_fpath) + # To avoid a longer length than training, reset the size of position + # encoding to max_length + model_dict["src_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + model_dict["trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + self.transformer.load_dict(model_dict) + + @serving + def translate(self, text, use_gpu=False): + paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') + + # Word segmentation + text = ' '.join(jieba.cut(text)) + # For decoding max length + decoder_max_length = 1 + # For decoding cache + cache = None + # For decoding start token id + bos_id = None + # Current source word index + i = 0 + # For decoding: is_last=True, max_len=256 + is_last = False + # Tokenized id + user_input_tokenized = [] + # Store the translation + result = [] + + bpe_str, tokenized_src = self.tokenizer.tokenize(text) + while i < len(tokenized_src): + user_input_tokenized.append(tokenized_src[i]) + if bpe_str[i] in ['。', '?', '!']: + is_last = True + result, cache, bos_id = predict( + user_input_tokenized, + decoder_max_length, + is_last, + cache, + bos_id, + result, + self.tokenizer, + self.transformer, + max_out_len=self.max_out_len) + i += 1 + return " ".join(result) \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/processor.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..03f90ec63c818878c52b6f791ca2a228562ec856 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/processor.py @@ -0,0 +1,124 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddlenlp.data import Vocab +from subword_nmt import subword_nmt + + +class STACLTokenizer: + """ + Jieba+BPE, and convert tokens to ids. + """ + + def __init__(self, + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + special_token=["", "", ""]): + bpe_parser = subword_nmt.create_apply_bpe_parser() + bpe_args = bpe_parser.parse_args(args=['-c', bpe_codes_fpath]) + bpe_args.codes.close() + bpe_args.codes = open(bpe_codes_fpath, 'r', encoding='utf-8') + self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, + bpe_args.separator, None, + bpe_args.glossaries) + + self.src_vocab = Vocab.load_vocabulary( + src_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.trg_vocab = Vocab.load_vocabulary( + trg_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.src_vocab_size = len(self.src_vocab) + self.trg_vocab_size = len(self.trg_vocab) + + def tokenize(self, text): + bpe_str = self.bpe.process_line(text) + ids = self.src_vocab.to_indices(bpe_str.split()) + return bpe_str.split(), ids + + +def post_process_seq(seq, + bos_idx=0, + eos_idx=1, + output_bos=False, + output_eos=False): + """ + Post-process the decoded sequence. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = [ + idx for idx in seq[:eos_pos + 1] + if (output_bos or idx != bos_idx) and (output_eos or idx != eos_idx) + ] + return seq + + +def predict(tokenized_src, + decoder_max_length, + is_last, + cache, + bos_id, + result, + tokenizer, + transformer, + n_best=1, + max_out_len=256, + eos_idx=1, + waitk=5, + ): + # Set evaluate mode + transformer.eval() + + if len(tokenized_src) < waitk: + return result, cache, bos_id + + with paddle.no_grad(): + paddle.disable_static() + input_src = tokenized_src + if is_last: + decoder_max_length = max_out_len + input_src += [eos_idx] + src_word = paddle.to_tensor(input_src).unsqueeze(axis=0) + finished_seq, finished_scores, cache = transformer.greedy_search( + src_word, + max_len=decoder_max_length, + waitk=waitk, + caches=cache, + bos_id=bos_id) + finished_seq = finished_seq.numpy() + for beam_idx, beam in enumerate(finished_seq[0]): + if beam_idx >= n_best: + break + id_list = post_process_seq(beam) + if len(id_list) == 0: + continue + bos_id = id_list[-1] + word_list = tokenizer.trg_vocab.to_tokens(id_list) + for word in word_list: + result.append(word) + res = ' '.join(word_list).replace('@@ ', '') + paddle.enable_static() + return result, cache, bos_id \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/requirements.txt b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..46ecba73afadc86c801fbeb72c72f7c7036491f0 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_5/requirements.txt @@ -0,0 +1,2 @@ +jieba==0.42.1 +subword-nmt==0.3.7 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/README.md b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/README.md new file mode 100644 index 0000000000000000000000000000000000000000..46d5e285ce69b59c7d4e9ffe213c8eb3f95caea1 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/README.md @@ -0,0 +1,210 @@ +# transformer_nist_wait_7 +|模型名称|transformer_nist_wait_7| +| :--- | :---: | +|类别|同声传译| +|网络|transformer| +|数据集|NIST 2008-中英翻译数据集| +|是否支持Fine-tuning|否| +|模型大小|377MB| +|最新更新日期|2021-09-17| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - 同声传译(Simultaneous Translation),即在句子完成之前进行翻译,同声传译的目标是实现同声传译的自动化,它可以与源语言同时翻译,延迟时间只有几秒钟。 + STACL 是论文 [STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework](https://www.aclweb.org/anthology/P19-1289/) 中针对同传提出的适用于所有同传场景的翻译架构。 + - STACL 主要具有以下优势: + + - Prefix-to-Prefix架构拥有预测能力,即在未看到源词的情况下仍然可以翻译出对应的目标词,克服了SOV→SVO等词序差异 +

+
+

+ 和传统的机器翻译模型主要的区别在于翻译时是否需要利用全句的源句。上图中,Seq2Seq模型需要等到全句的源句(1-5)全部输入Encoder后,Decoder才开始解码进行翻译;而STACL架构采用了Wait-k(图中Wait-2)的策略,当源句只有两个词(1和2)输入到Encoder后,Decoder即可开始解码预测目标句的第一个词。 + + - Wait-k策略可以不需要全句的源句,直接预测目标句,可以实现任意的字级延迟,同时保持较高的翻译质量。 +

+
+

+ Wait-k策略首先等待源句单词,然后与源句的其余部分同时翻译,即输出总是隐藏在输入后面。这是受到同声传译人员的启发,同声传译人员通常会在几秒钟内开始翻译演讲者的演讲,在演讲者结束几秒钟后完成。例如,如果k=2,第一个目标词使用前2个源词预测,第二个目标词使用前3个源词预测,以此类推。上图中,(a)simultaneous: our wait-2 等到"布什"和"总统"输入后就开始解码预测"pres.",而(b) non-simultaneous baseline 为传统的翻译模型,需要等到整句"布什 总统 在 莫斯科 与 普京 会晤"才开始解码预测。 + + - 该PaddleHub Module基于transformer网络结构,采用wait-7策略进行中文到英文的翻译。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install transformer_nist_wait_7 + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + model = hub.Module(name="transformer_nist_wait_7") + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + for t in text: + print("input: {}".format(t)) + result = model.translate(t) + print("model output: {}\n".format(result)) + + # input: 他 + # model output: + # + # input: 他还 + # model output: + # + # input: 他还说 + # model output: + # + # input: 他还说现在 + # model output: + # + # input: 他还说现在正在 + # model output: + # + # input: 他还说现在正在为 + # model output: + # + # input: 他还说现在正在为这 + # model output: he + # + # input: 他还说现在正在为这一 + # model output: he also + # + # input: 他还说现在正在为这一会议 + # model output: he also said + # + # input: 他还说现在正在为这一会议作出 + # model output: he also said that + # + # input: 他还说现在正在为这一会议作出安排 + # model output: he also said that arrangements + # + # input: 他还说现在正在为这一会议作出安排。 + # model output: he also said that arrangements are now being made for this meeting . + ``` + +- ### 2、 API + + - ```python + __init__(max_length=256, max_out_len=256) + ``` + + - 初始化module, 可配置模型的输入文本的最大长度 + + - **参数** + + - max_length(int): 输入文本的最大长度,默认值为256。 + - max_out_len(int): 输出文本的最大解码长度,超过最大解码长度时会截断句子的后半部分,默认值为256。 + + - ```python + translate(text, use_gpu=False) + ``` + + - 预测API,输入源语言的文本(模拟同传语音输入),解码后输出翻译后的目标语言文本。 + + - **参数** + + - text(str): 输入源语言的文本,数据类型为str + - use_gpu(bool): 是否使用gpu进行预测,默认为False + + - **返回** + + - result(str): 翻译后的目标语言文本。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线同声传译服务(需要用户配置一个语音转文本应用预先将语音输入转为中文文字),可以将此接口用于在线web应用。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m transformer_nist_wait_7 + ``` + + - 启动时会显示加载模型过程,启动成功后显示 + + - ```shell + Loading transformer_nist_wait_7 successful. + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + # 指定预测方法为transformer_nist_wait_7并发送post请求,content-type类型应指定json方式 + # HOST_IP为服务器IP + url = "http://HOST_IP:8866/predict/transformer_nist_wait_7" + headers = {"Content-Type": "application/json"} + for t in text: + print("input: {}".format(t)) + r = requests.post(url=url, headers=headers, data=json.dumps({"text": t})) + # 打印预测结果 + print("model output: {}\n".format(result.json()['results'])) + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + +## 五、更新历史 + +* 1.0.0 + 初始发布 + ```shell + hub install transformer_nist_wait_7==1.0.0 + ``` diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/__init__.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/model.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/model.py new file mode 100644 index 0000000000000000000000000000000000000000..32cfe670981b1b3bc3e782997679c00242f110e6 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/model.py @@ -0,0 +1,339 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding + + +class DecoderLayer(nn.TransformerDecoderLayer): + def __init__(self, *args, **kwargs): + super(DecoderLayer, self).__init__(*args, **kwargs) + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + residual = tgt + if self.normalize_before: + tgt = self.norm1(tgt) + if cache is None: + tgt = self.self_attn(tgt, tgt, tgt, tgt_mask, None) + else: + tgt, incremental_cache = self.self_attn(tgt, tgt, tgt, tgt_mask, + cache[0]) + tgt = residual + self.dropout1(tgt) + if not self.normalize_before: + tgt = self.norm1(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm2(tgt) + if len(memory) == 1: + # Full sent + tgt = self.cross_attn(tgt, memory[0], memory[0], memory_mask, None) + else: + # Wait-k policy + cross_attn_outputs = [] + for i in range(tgt.shape[1]): + q = tgt[:, i:i + 1, :] + if i >= len(memory): + e = memory[-1] + else: + e = memory[i] + cross_attn_outputs.append( + self.cross_attn(q, e, e, memory_mask[:, :, i:i + 1, : + e.shape[1]], None)) + tgt = paddle.concat(cross_attn_outputs, axis=1) + tgt = residual + self.dropout2(tgt) + if not self.normalize_before: + tgt = self.norm2(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm3(tgt) + tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = residual + self.dropout3(tgt) + if not self.normalize_before: + tgt = self.norm3(tgt) + return tgt if cache is None else (tgt, (incremental_cache, )) + + +class Decoder(nn.TransformerDecoder): + """ + PaddlePaddle 2.1 casts memory_mask.dtype to memory.dtype, but in STACL, + type of memory is list, having no dtype attribute. + """ + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + output = tgt + new_caches = [] + for i, mod in enumerate(self.layers): + if cache is None: + output = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=None) + else: + output, new_cache = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=cache[i]) + new_caches.append(new_cache) + + if self.norm is not None: + output = self.norm(output) + + return output if cache is None else (output, new_caches) + + +class SimultaneousTransformer(nn.Layer): + """ + model + """ + def __init__(self, + src_vocab_size, + trg_vocab_size, + max_length=256, + n_layer=6, + n_head=8, + d_model=512, + d_inner_hid=2048, + dropout=0.1, + weight_sharing=False, + bos_id=0, + eos_id=1, + waitk=-1): + super(SimultaneousTransformer, self).__init__() + self.trg_vocab_size = trg_vocab_size + self.emb_dim = d_model + self.bos_id = bos_id + self.eos_id = eos_id + self.dropout = dropout + self.waitk = waitk + self.n_layer = n_layer + self.n_head = n_head + self.d_model = d_model + + self.src_word_embedding = WordEmbedding( + vocab_size=src_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.src_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + if weight_sharing: + assert src_vocab_size == trg_vocab_size, ( + "Vocabularies in source and target should be same for weight sharing." + ) + self.trg_word_embedding = self.src_word_embedding + self.trg_pos_embedding = self.src_pos_embedding + else: + self.trg_word_embedding = WordEmbedding( + vocab_size=trg_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.trg_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + + encoder_layer = nn.TransformerEncoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, True]) + encoder_norm = nn.LayerNorm(d_model) + self.encoder = nn.TransformerEncoder( + encoder_layer=encoder_layer, num_layers=n_layer, norm=encoder_norm) + + decoder_layer = DecoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, False, True]) + decoder_norm = nn.LayerNorm(d_model) + self.decoder = Decoder( + decoder_layer=decoder_layer, num_layers=n_layer, norm=decoder_norm) + + if weight_sharing: + self.linear = lambda x: paddle.matmul( + x=x, y=self.trg_word_embedding.word_embedding.weight, transpose_y=True) + else: + self.linear = nn.Linear( + in_features=d_model, + out_features=trg_vocab_size, + bias_attr=False) + + def forward(self, src_word, trg_word): + src_max_len = paddle.shape(src_word)[-1] + trg_max_len = paddle.shape(trg_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_slf_attn_bias = paddle.tensor.triu( + (paddle.ones( + (trg_max_len, trg_max_len), + dtype=paddle.get_default_dtype()) * -np.inf), + 1) + trg_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, trg_max_len, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + trg_pos = paddle.cast( + trg_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=trg_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + with paddle.static.amp.fp16_guard(): + if self.waitk >= src_max_len or self.waitk == -1: + # Full sentence + enc_outputs = [ + self.encoder( + enc_input, src_mask=src_slf_attn_bias) + ] + else: + # Wait-k policy + enc_outputs = [] + for i in range(self.waitk, src_max_len + 1): + enc_output = self.encoder( + enc_input[:, :i, :], + src_mask=src_slf_attn_bias[:, :, :, :i]) + enc_outputs.append(enc_output) + + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + dec_output = self.decoder( + dec_input, + enc_outputs, + tgt_mask=trg_slf_attn_bias, + memory_mask=trg_src_attn_bias) + + predict = self.linear(dec_output) + + return predict + + def beam_search(self, src_word, beam_size=4, max_len=256, waitk=-1): + # TODO: "Speculative Beam Search for Simultaneous Translation" + raise NotImplementedError + + def greedy_search(self, + src_word, + max_len=256, + waitk=-1, + caches=None, + bos_id=None): + """ + greedy_search uses streaming reader. It doesn't need calling + encoder many times, an a sub-sentence just needs calling encoder once. + So, it needs previous state(caches) and last one of generated + tokens id last time. + """ + src_max_len = paddle.shape(src_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)] + + # constant number + batch_size = enc_outputs[-1].shape[0] + max_len = ( + enc_outputs[-1].shape[1] + 20) if max_len is None else max_len + end_token_tensor = paddle.full( + shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") + + predict_ids = [] + log_probs = paddle.full( + shape=[batch_size, 1], fill_value=0, dtype="float32") + if not bos_id: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") + else: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=bos_id, dtype="int64") + + # init states (caches) for transformer + if not caches: + caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False) + + for i in range(max_len): + trg_pos = paddle.full( + shape=trg_word.shape, fill_value=i, dtype="int64") + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + + if waitk < 0 or i >= len(enc_outputs): + # if the decoder step is full sent or longer than all source + # step, then read the whole src + _e = enc_outputs[-1] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + else: + _e = enc_outputs[i] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + + dec_output = paddle.reshape( + dec_output, shape=[-1, dec_output.shape[-1]]) + + logits = self.linear(dec_output) + step_log_probs = paddle.log(F.softmax(logits, axis=-1)) + log_probs = paddle.add(x=step_log_probs, y=log_probs) + scores = log_probs + topk_scores, topk_indices = paddle.topk(x=scores, k=1) + + finished = paddle.equal(topk_indices, end_token_tensor) + trg_word = topk_indices + log_probs = topk_scores + + predict_ids.append(topk_indices) + + if paddle.all(finished).numpy(): + break + + predict_ids = paddle.stack(predict_ids, axis=0) + finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) + finished_scores = topk_scores + + return finished_seq, finished_scores, caches \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/module.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/module.py new file mode 100644 index 0000000000000000000000000000000000000000..bc75ea3fccc1be2f366cf1fda015538833e63fa1 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/module.py @@ -0,0 +1,125 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import jieba +import paddle +from paddlenlp.transformers import position_encoding_init +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving + +from transformer_nist_wait_7.model import SimultaneousTransformer +from transformer_nist_wait_7.processor import STACLTokenizer, predict + + +@moduleinfo( + name="transformer_nist_wait_7", + version="1.0.0", + summary="", + author="PaddlePaddle", + author_email="", + type="nlp/simultaneous_translation", +) +class STTransformer(): + """ + Transformer model for simultaneous translation. + """ + + # Model config + model_config = { + # Number of head used in multi-head attention. + "n_head": 8, + # Number of sub-layers to be stacked in the encoder and decoder. + "n_layer": 6, + # The dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + "d_model": 512, + } + + def __init__(self, + max_length=256, + max_out_len=256, + ): + super(STTransformer, self).__init__() + bpe_codes_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_7", "assets", "2M.zh2en.dict4bpe.zh") + src_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_7", "assets", "nist.20k.zh.vocab") + trg_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_7", "assets", "nist.10k.en.vocab") + params_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_7", "assets", "transformer.pdparams") + self.max_length = max_length + self.max_out_len = max_out_len + self.tokenizer = STACLTokenizer( + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + ) + src_vocab_size = self.tokenizer.src_vocab_size + trg_vocab_size = self.tokenizer.trg_vocab_size + self.transformer = SimultaneousTransformer( + src_vocab_size, + trg_vocab_size, + max_length=self.max_length, + n_layer=self.model_config['n_layer'], + n_head=self.model_config['n_head'], + d_model=self.model_config['d_model'], + ) + model_dict = paddle.load(params_fpath) + # To avoid a longer length than training, reset the size of position + # encoding to max_length + model_dict["src_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + model_dict["trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + self.transformer.load_dict(model_dict) + + @serving + def translate(self, text, use_gpu=False): + paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') + + # Word segmentation + text = ' '.join(jieba.cut(text)) + # For decoding max length + decoder_max_length = 1 + # For decoding cache + cache = None + # For decoding start token id + bos_id = None + # Current source word index + i = 0 + # For decoding: is_last=True, max_len=256 + is_last = False + # Tokenized id + user_input_tokenized = [] + # Store the translation + result = [] + + bpe_str, tokenized_src = self.tokenizer.tokenize(text) + while i < len(tokenized_src): + user_input_tokenized.append(tokenized_src[i]) + if bpe_str[i] in ['。', '?', '!']: + is_last = True + result, cache, bos_id = predict( + user_input_tokenized, + decoder_max_length, + is_last, + cache, + bos_id, + result, + self.tokenizer, + self.transformer, + max_out_len=self.max_out_len) + i += 1 + return " ".join(result) \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/processor.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..e90765510ef1f069c63be05fe338587b3071f7e4 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/processor.py @@ -0,0 +1,124 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddlenlp.data import Vocab +from subword_nmt import subword_nmt + + +class STACLTokenizer: + """ + Jieba+BPE, and convert tokens to ids. + """ + + def __init__(self, + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + special_token=["", "", ""]): + bpe_parser = subword_nmt.create_apply_bpe_parser() + bpe_args = bpe_parser.parse_args(args=['-c', bpe_codes_fpath]) + bpe_args.codes.close() + bpe_args.codes = open(bpe_codes_fpath, 'r', encoding='utf-8') + self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, + bpe_args.separator, None, + bpe_args.glossaries) + + self.src_vocab = Vocab.load_vocabulary( + src_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.trg_vocab = Vocab.load_vocabulary( + trg_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.src_vocab_size = len(self.src_vocab) + self.trg_vocab_size = len(self.trg_vocab) + + def tokenize(self, text): + bpe_str = self.bpe.process_line(text) + ids = self.src_vocab.to_indices(bpe_str.split()) + return bpe_str.split(), ids + + +def post_process_seq(seq, + bos_idx=0, + eos_idx=1, + output_bos=False, + output_eos=False): + """ + Post-process the decoded sequence. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = [ + idx for idx in seq[:eos_pos + 1] + if (output_bos or idx != bos_idx) and (output_eos or idx != eos_idx) + ] + return seq + + +def predict(tokenized_src, + decoder_max_length, + is_last, + cache, + bos_id, + result, + tokenizer, + transformer, + n_best=1, + max_out_len=256, + eos_idx=1, + waitk=7, + ): + # Set evaluate mode + transformer.eval() + + if len(tokenized_src) < waitk: + return result, cache, bos_id + + with paddle.no_grad(): + paddle.disable_static() + input_src = tokenized_src + if is_last: + decoder_max_length = max_out_len + input_src += [eos_idx] + src_word = paddle.to_tensor(input_src).unsqueeze(axis=0) + finished_seq, finished_scores, cache = transformer.greedy_search( + src_word, + max_len=decoder_max_length, + waitk=waitk, + caches=cache, + bos_id=bos_id) + finished_seq = finished_seq.numpy() + for beam_idx, beam in enumerate(finished_seq[0]): + if beam_idx >= n_best: + break + id_list = post_process_seq(beam) + if len(id_list) == 0: + continue + bos_id = id_list[-1] + word_list = tokenizer.trg_vocab.to_tokens(id_list) + for word in word_list: + result.append(word) + res = ' '.join(word_list).replace('@@ ', '') + paddle.enable_static() + return result, cache, bos_id \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/requirements.txt b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..46ecba73afadc86c801fbeb72c72f7c7036491f0 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_7/requirements.txt @@ -0,0 +1,2 @@ +jieba==0.42.1 +subword-nmt==0.3.7 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/README.md b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/README.md new file mode 100644 index 0000000000000000000000000000000000000000..da536fd9366794b59fe44a76f484728e79863ae3 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/README.md @@ -0,0 +1,210 @@ +# transformer_nist_wait_all +|模型名称|transformer_nist_wait_all| +| :--- | :---: | +|类别|同声传译| +|网络|transformer| +|数据集|NIST 2008-中英翻译数据集| +|是否支持Fine-tuning|否| +|模型大小|377MB| +|最新更新日期|2021-09-17| +|数据指标|-| + +## 一、模型基本信息 + +- ### 模型介绍 + + - 同声传译(Simultaneous Translation),即在句子完成之前进行翻译,同声传译的目标是实现同声传译的自动化,它可以与源语言同时翻译,延迟时间只有几秒钟。 + STACL 是论文 [STACL: Simultaneous Translation with Implicit Anticipation and Controllable Latency using Prefix-to-Prefix Framework](https://www.aclweb.org/anthology/P19-1289/) 中针对同传提出的适用于所有同传场景的翻译架构。 + - STACL 主要具有以下优势: + + - Prefix-to-Prefix架构拥有预测能力,即在未看到源词的情况下仍然可以翻译出对应的目标词,克服了SOV→SVO等词序差异 +

+
+

+ 和传统的机器翻译模型主要的区别在于翻译时是否需要利用全句的源句。上图中,Seq2Seq模型需要等到全句的源句(1-5)全部输入Encoder后,Decoder才开始解码进行翻译;而STACL架构采用了Wait-k(图中Wait-2)的策略,当源句只有两个词(1和2)输入到Encoder后,Decoder即可开始解码预测目标句的第一个词。 + + - Wait-k策略可以不需要全句的源句,直接预测目标句,可以实现任意的字级延迟,同时保持较高的翻译质量。 +

+
+

+ Wait-k策略首先等待源句单词,然后与源句的其余部分同时翻译,即输出总是隐藏在输入后面。这是受到同声传译人员的启发,同声传译人员通常会在几秒钟内开始翻译演讲者的演讲,在演讲者结束几秒钟后完成。例如,如果k=2,第一个目标词使用前2个源词预测,第二个目标词使用前3个源词预测,以此类推。上图中,(a)simultaneous: our wait-2 等到"布什"和"总统"输入后就开始解码预测"pres.",而(b) non-simultaneous baseline 为传统的翻译模型,需要等到整句"布什 总统 在 莫斯科 与 普京 会晤"才开始解码预测。 + + - 该PaddleHub Module基于transformer网络结构,采用的策略是等到全句结束再进行中文到英文的翻译,即waitk=-1。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install transformer_nist_wait_all + ``` + + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、预测代码示例 + + - ```python + import paddlehub as hub + + model = hub.Module(name="transformer_nist_wait_all") + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + for t in text: + print("input: {}".format(t)) + result = model.translate(t) + print("model output: {}\n".format(result)) + + # input: 他 + # model output: + # + # input: 他还 + # model output: + # + # input: 他还说 + # model output: + # + # input: 他还说现在 + # model output: + # + # input: 他还说现在正在 + # model output: + # + # input: 他还说现在正在为 + # model output: + # + # input: 他还说现在正在为这 + # model output: + # + # input: 他还说现在正在为这一 + # model output: + # + # input: 他还说现在正在为这一会议 + # model output: + # + # input: 他还说现在正在为这一会议作出 + # model output: + # + # input: 他还说现在正在为这一会议作出安排 + # model output: + # + # input: 他还说现在正在为这一会议作出安排。 + # model output: he also said that arrangements are now being made for this meeting . + ``` + +- ### 2、 API + + - ```python + __init__(max_length=256, max_out_len=256) + ``` + + - 初始化module, 可配置模型的输入文本的最大长度 + + - **参数** + + - max_length(int): 输入文本的最大长度,默认值为256。 + - max_out_len(int): 输出文本的最大解码长度,超过最大解码长度时会截断句子的后半部分,默认值为256。 + + - ```python + translate(text, use_gpu=False) + ``` + + - 预测API,输入源语言的文本(模拟同传语音输入),解码后输出翻译后的目标语言文本。 + + - **参数** + + - text(str): 输入源语言的文本,数据类型为str + - use_gpu(bool): 是否使用gpu进行预测,默认为False + + - **返回** + + - result(str): 翻译后的目标语言文本。 + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线同声传译服务(需要用户配置一个语音转文本应用预先将语音输入转为中文文字),可以将此接口用于在线web应用。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + + - ```shell + $ hub serving start -m transformer_nist_wait_all + ``` + + - 启动时会显示加载模型过程,启动成功后显示 + + - ```shell + Loading transformer_nist_wait_all successful. + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 待预测数据(模拟同声传译实时输入) + text = [ + "他", + "他还", + "他还说", + "他还说现在", + "他还说现在正在", + "他还说现在正在为", + "他还说现在正在为这", + "他还说现在正在为这一", + "他还说现在正在为这一会议", + "他还说现在正在为这一会议作出", + "他还说现在正在为这一会议作出安排", + "他还说现在正在为这一会议作出安排。", + ] + + # 指定预测方法为transformer_nist_wait_all并发送post请求,content-type类型应指定json方式 + # HOST_IP为服务器IP + url = "http://HOST_IP:8866/predict/transformer_nist_wait_all" + headers = {"Content-Type": "application/json"} + for t in text: + print("input: {}".format(t)) + r = requests.post(url=url, headers=headers, data=json.dumps({"text": t})) + # 打印预测结果 + print("model output: {}\n".format(result.json()['results'])) + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + +## 五、更新历史 + +* 1.0.0 + 初始发布 + ```shell + hub install transformer_nist_wait_all==1.0.0 + ``` diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/__init__.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/model.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/model.py new file mode 100644 index 0000000000000000000000000000000000000000..32cfe670981b1b3bc3e782997679c00242f110e6 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/model.py @@ -0,0 +1,339 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding + + +class DecoderLayer(nn.TransformerDecoderLayer): + def __init__(self, *args, **kwargs): + super(DecoderLayer, self).__init__(*args, **kwargs) + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + residual = tgt + if self.normalize_before: + tgt = self.norm1(tgt) + if cache is None: + tgt = self.self_attn(tgt, tgt, tgt, tgt_mask, None) + else: + tgt, incremental_cache = self.self_attn(tgt, tgt, tgt, tgt_mask, + cache[0]) + tgt = residual + self.dropout1(tgt) + if not self.normalize_before: + tgt = self.norm1(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm2(tgt) + if len(memory) == 1: + # Full sent + tgt = self.cross_attn(tgt, memory[0], memory[0], memory_mask, None) + else: + # Wait-k policy + cross_attn_outputs = [] + for i in range(tgt.shape[1]): + q = tgt[:, i:i + 1, :] + if i >= len(memory): + e = memory[-1] + else: + e = memory[i] + cross_attn_outputs.append( + self.cross_attn(q, e, e, memory_mask[:, :, i:i + 1, : + e.shape[1]], None)) + tgt = paddle.concat(cross_attn_outputs, axis=1) + tgt = residual + self.dropout2(tgt) + if not self.normalize_before: + tgt = self.norm2(tgt) + + residual = tgt + if self.normalize_before: + tgt = self.norm3(tgt) + tgt = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) + tgt = residual + self.dropout3(tgt) + if not self.normalize_before: + tgt = self.norm3(tgt) + return tgt if cache is None else (tgt, (incremental_cache, )) + + +class Decoder(nn.TransformerDecoder): + """ + PaddlePaddle 2.1 casts memory_mask.dtype to memory.dtype, but in STACL, + type of memory is list, having no dtype attribute. + """ + + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): + output = tgt + new_caches = [] + for i, mod in enumerate(self.layers): + if cache is None: + output = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=None) + else: + output, new_cache = mod(output, + memory, + tgt_mask=tgt_mask, + memory_mask=memory_mask, + cache=cache[i]) + new_caches.append(new_cache) + + if self.norm is not None: + output = self.norm(output) + + return output if cache is None else (output, new_caches) + + +class SimultaneousTransformer(nn.Layer): + """ + model + """ + def __init__(self, + src_vocab_size, + trg_vocab_size, + max_length=256, + n_layer=6, + n_head=8, + d_model=512, + d_inner_hid=2048, + dropout=0.1, + weight_sharing=False, + bos_id=0, + eos_id=1, + waitk=-1): + super(SimultaneousTransformer, self).__init__() + self.trg_vocab_size = trg_vocab_size + self.emb_dim = d_model + self.bos_id = bos_id + self.eos_id = eos_id + self.dropout = dropout + self.waitk = waitk + self.n_layer = n_layer + self.n_head = n_head + self.d_model = d_model + + self.src_word_embedding = WordEmbedding( + vocab_size=src_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.src_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + if weight_sharing: + assert src_vocab_size == trg_vocab_size, ( + "Vocabularies in source and target should be same for weight sharing." + ) + self.trg_word_embedding = self.src_word_embedding + self.trg_pos_embedding = self.src_pos_embedding + else: + self.trg_word_embedding = WordEmbedding( + vocab_size=trg_vocab_size, emb_dim=d_model, bos_id=self.bos_id) + self.trg_pos_embedding = PositionalEmbedding( + emb_dim=d_model, max_length=max_length+1) + + encoder_layer = nn.TransformerEncoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, True]) + encoder_norm = nn.LayerNorm(d_model) + self.encoder = nn.TransformerEncoder( + encoder_layer=encoder_layer, num_layers=n_layer, norm=encoder_norm) + + decoder_layer = DecoderLayer( + d_model=d_model, + nhead=n_head, + dim_feedforward=d_inner_hid, + dropout=dropout, + activation='relu', + normalize_before=True, + bias_attr=[False, False, True]) + decoder_norm = nn.LayerNorm(d_model) + self.decoder = Decoder( + decoder_layer=decoder_layer, num_layers=n_layer, norm=decoder_norm) + + if weight_sharing: + self.linear = lambda x: paddle.matmul( + x=x, y=self.trg_word_embedding.word_embedding.weight, transpose_y=True) + else: + self.linear = nn.Linear( + in_features=d_model, + out_features=trg_vocab_size, + bias_attr=False) + + def forward(self, src_word, trg_word): + src_max_len = paddle.shape(src_word)[-1] + trg_max_len = paddle.shape(trg_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_slf_attn_bias = paddle.tensor.triu( + (paddle.ones( + (trg_max_len, trg_max_len), + dtype=paddle.get_default_dtype()) * -np.inf), + 1) + trg_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, trg_max_len, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + trg_pos = paddle.cast( + trg_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=trg_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + with paddle.static.amp.fp16_guard(): + if self.waitk >= src_max_len or self.waitk == -1: + # Full sentence + enc_outputs = [ + self.encoder( + enc_input, src_mask=src_slf_attn_bias) + ] + else: + # Wait-k policy + enc_outputs = [] + for i in range(self.waitk, src_max_len + 1): + enc_output = self.encoder( + enc_input[:, :i, :], + src_mask=src_slf_attn_bias[:, :, :, :i]) + enc_outputs.append(enc_output) + + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + dec_output = self.decoder( + dec_input, + enc_outputs, + tgt_mask=trg_slf_attn_bias, + memory_mask=trg_src_attn_bias) + + predict = self.linear(dec_output) + + return predict + + def beam_search(self, src_word, beam_size=4, max_len=256, waitk=-1): + # TODO: "Speculative Beam Search for Simultaneous Translation" + raise NotImplementedError + + def greedy_search(self, + src_word, + max_len=256, + waitk=-1, + caches=None, + bos_id=None): + """ + greedy_search uses streaming reader. It doesn't need calling + encoder many times, an a sub-sentence just needs calling encoder once. + So, it needs previous state(caches) and last one of generated + tokens id last time. + """ + src_max_len = paddle.shape(src_word)[-1] + base_attn_bias = paddle.cast( + src_word == self.bos_id, + dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 + src_slf_attn_bias = base_attn_bias + src_slf_attn_bias.stop_gradient = True + trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, 1, 1]) + src_pos = paddle.cast( + src_word != self.bos_id, dtype="int64") * paddle.arange( + start=0, end=src_max_len) + src_emb = self.src_word_embedding(src_word) + src_pos_emb = self.src_pos_embedding(src_pos) + src_emb = src_emb + src_pos_emb + enc_input = F.dropout( + src_emb, p=self.dropout, + training=self.training) if self.dropout else src_emb + enc_outputs = [self.encoder(enc_input, src_mask=src_slf_attn_bias)] + + # constant number + batch_size = enc_outputs[-1].shape[0] + max_len = ( + enc_outputs[-1].shape[1] + 20) if max_len is None else max_len + end_token_tensor = paddle.full( + shape=[batch_size, 1], fill_value=self.eos_id, dtype="int64") + + predict_ids = [] + log_probs = paddle.full( + shape=[batch_size, 1], fill_value=0, dtype="float32") + if not bos_id: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=self.bos_id, dtype="int64") + else: + trg_word = paddle.full( + shape=[batch_size, 1], fill_value=bos_id, dtype="int64") + + # init states (caches) for transformer + if not caches: + caches = self.decoder.gen_cache(enc_outputs[-1], do_zip=False) + + for i in range(max_len): + trg_pos = paddle.full( + shape=trg_word.shape, fill_value=i, dtype="int64") + trg_emb = self.trg_word_embedding(trg_word) + trg_pos_emb = self.trg_pos_embedding(trg_pos) + trg_emb = trg_emb + trg_pos_emb + dec_input = F.dropout( + trg_emb, p=self.dropout, + training=self.training) if self.dropout else trg_emb + + if waitk < 0 or i >= len(enc_outputs): + # if the decoder step is full sent or longer than all source + # step, then read the whole src + _e = enc_outputs[-1] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + else: + _e = enc_outputs[i] + dec_output, caches = self.decoder( + dec_input, [_e], None, + trg_src_attn_bias[:, :, :, :_e.shape[1]], caches) + + dec_output = paddle.reshape( + dec_output, shape=[-1, dec_output.shape[-1]]) + + logits = self.linear(dec_output) + step_log_probs = paddle.log(F.softmax(logits, axis=-1)) + log_probs = paddle.add(x=step_log_probs, y=log_probs) + scores = log_probs + topk_scores, topk_indices = paddle.topk(x=scores, k=1) + + finished = paddle.equal(topk_indices, end_token_tensor) + trg_word = topk_indices + log_probs = topk_scores + + predict_ids.append(topk_indices) + + if paddle.all(finished).numpy(): + break + + predict_ids = paddle.stack(predict_ids, axis=0) + finished_seq = paddle.transpose(predict_ids, [1, 2, 0]) + finished_scores = topk_scores + + return finished_seq, finished_scores, caches \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/module.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/module.py new file mode 100644 index 0000000000000000000000000000000000000000..95c40e1182078f336e1c5f40d2e73f9025be3549 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/module.py @@ -0,0 +1,125 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import jieba +import paddle +from paddlenlp.transformers import position_encoding_init +from paddlenlp.transformers import WordEmbedding, PositionalEmbedding +from paddlehub.env import MODULE_HOME +from paddlehub.module.module import moduleinfo, serving + +from transformer_nist_wait_all.model import SimultaneousTransformer +from transformer_nist_wait_all.processor import STACLTokenizer, predict + + +@moduleinfo( + name="transformer_nist_wait_all", + version="1.0.0", + summary="", + author="PaddlePaddle", + author_email="", + type="nlp/simultaneous_translation", +) +class STTransformer(): + """ + Transformer model for simultaneous translation. + """ + + # Model config + model_config = { + # Number of head used in multi-head attention. + "n_head": 8, + # Number of sub-layers to be stacked in the encoder and decoder. + "n_layer": 6, + # The dimension for word embeddings, which is also the last dimension of + # the input and output of multi-head attention, position-wise feed-forward + # networks, encoder and decoder. + "d_model": 512, + } + + def __init__(self, + max_length=256, + max_out_len=256, + ): + super(STTransformer, self).__init__() + bpe_codes_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "2M.zh2en.dict4bpe.zh") + src_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "nist.20k.zh.vocab") + trg_vocab_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "nist.10k.en.vocab") + params_fpath = os.path.join(MODULE_HOME, "transformer_nist_wait_all", "assets", "transformer.pdparams") + self.max_length = max_length + self.max_out_len = max_out_len + self.tokenizer = STACLTokenizer( + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + ) + src_vocab_size = self.tokenizer.src_vocab_size + trg_vocab_size = self.tokenizer.trg_vocab_size + self.transformer = SimultaneousTransformer( + src_vocab_size, + trg_vocab_size, + max_length=self.max_length, + n_layer=self.model_config['n_layer'], + n_head=self.model_config['n_head'], + d_model=self.model_config['d_model'], + ) + model_dict = paddle.load(params_fpath) + # To avoid a longer length than training, reset the size of position + # encoding to max_length + model_dict["src_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + model_dict["trg_pos_embedding.pos_encoder.weight"] = position_encoding_init( + self.max_length + 1, self.model_config['d_model']) + self.transformer.load_dict(model_dict) + + @serving + def translate(self, text, use_gpu=False): + paddle.set_device('gpu') if use_gpu else paddle.set_device('cpu') + + # Word segmentation + text = ' '.join(jieba.cut(text)) + # For decoding max length + decoder_max_length = 1 + # For decoding cache + cache = None + # For decoding start token id + bos_id = None + # Current source word index + i = 0 + # For decoding: is_last=True, max_len=256 + is_last = False + # Tokenized id + user_input_tokenized = [] + # Store the translation + result = [] + + bpe_str, tokenized_src = self.tokenizer.tokenize(text) + while i < len(tokenized_src): + user_input_tokenized.append(tokenized_src[i]) + if bpe_str[i] in ['。', '?', '!']: + is_last = True + result, cache, bos_id = predict( + user_input_tokenized, + decoder_max_length, + is_last, + cache, + bos_id, + result, + self.tokenizer, + self.transformer, + max_out_len=self.max_out_len) + i += 1 + return " ".join(result) \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/processor.py b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/processor.py new file mode 100644 index 0000000000000000000000000000000000000000..8283087df4a5d228a1d5e7888a29da00cf5acc8c --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/processor.py @@ -0,0 +1,124 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +from paddlenlp.data import Vocab +from subword_nmt import subword_nmt + + +class STACLTokenizer: + """ + Jieba+BPE, and convert tokens to ids. + """ + + def __init__(self, + bpe_codes_fpath, + src_vocab_fpath, + trg_vocab_fpath, + special_token=["", "", ""]): + bpe_parser = subword_nmt.create_apply_bpe_parser() + bpe_args = bpe_parser.parse_args(args=['-c', bpe_codes_fpath]) + bpe_args.codes.close() + bpe_args.codes = open(bpe_codes_fpath, 'r', encoding='utf-8') + self.bpe = subword_nmt.BPE(bpe_args.codes, bpe_args.merges, + bpe_args.separator, None, + bpe_args.glossaries) + + self.src_vocab = Vocab.load_vocabulary( + src_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.trg_vocab = Vocab.load_vocabulary( + trg_vocab_fpath, + bos_token=special_token[0], + eos_token=special_token[1], + unk_token=special_token[2]) + + self.src_vocab_size = len(self.src_vocab) + self.trg_vocab_size = len(self.trg_vocab) + + def tokenize(self, text): + bpe_str = self.bpe.process_line(text) + ids = self.src_vocab.to_indices(bpe_str.split()) + return bpe_str.split(), ids + + +def post_process_seq(seq, + bos_idx=0, + eos_idx=1, + output_bos=False, + output_eos=False): + """ + Post-process the decoded sequence. + """ + eos_pos = len(seq) - 1 + for i, idx in enumerate(seq): + if idx == eos_idx: + eos_pos = i + break + seq = [ + idx for idx in seq[:eos_pos + 1] + if (output_bos or idx != bos_idx) and (output_eos or idx != eos_idx) + ] + return seq + + +def predict(tokenized_src, + decoder_max_length, + is_last, + cache, + bos_id, + result, + tokenizer, + transformer, + n_best=1, + max_out_len=256, + eos_idx=1, + waitk=-1, + ): + # Set evaluate mode + transformer.eval() + + if not is_last: + return result, cache, bos_id + + with paddle.no_grad(): + paddle.disable_static() + input_src = tokenized_src + if is_last: + decoder_max_length = max_out_len + input_src += [eos_idx] + src_word = paddle.to_tensor(input_src).unsqueeze(axis=0) + finished_seq, finished_scores, cache = transformer.greedy_search( + src_word, + max_len=decoder_max_length, + waitk=waitk, + caches=cache, + bos_id=bos_id) + finished_seq = finished_seq.numpy() + for beam_idx, beam in enumerate(finished_seq[0]): + if beam_idx >= n_best: + break + id_list = post_process_seq(beam) + if len(id_list) == 0: + continue + bos_id = id_list[-1] + word_list = tokenizer.trg_vocab.to_tokens(id_list) + for word in word_list: + result.append(word) + res = ' '.join(word_list).replace('@@ ', '') + paddle.enable_static() + return result, cache, bos_id \ No newline at end of file diff --git a/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/requirements.txt b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..46ecba73afadc86c801fbeb72c72f7c7036491f0 --- /dev/null +++ b/modules/text/simultaneous_translation/stacl/transformer_nist_wait_all/requirements.txt @@ -0,0 +1,2 @@ +jieba==0.42.1 +subword-nmt==0.3.7 diff --git a/modules/text/syntactic_analysis/DDParser/README.md b/modules/text/syntactic_analysis/DDParser/README.md index c4401c5bbcb9158d9a5cb62e6c6991bf9da72896..d93dcfa716af798e29b218d942c38b7f2a8fac7a 100644 --- a/modules/text/syntactic_analysis/DDParser/README.md +++ b/modules/text/syntactic_analysis/DDParser/README.md @@ -3,11 +3,11 @@ |模型名称|DDParser| | :--- | :---: | |类别|文本-句法分析| -|网络|LSTM| +|网络|Deep Biaffine Attention| |数据集|搜索query、网页文本、语音输入等数据| |是否支持Fine-tuning|否| -|模型大小|33MB| -|最新更新日期|2021-02-26| +|模型大小|61MB| +|最新更新日期|2021-10-26| |数据指标|-| @@ -24,15 +24,11 @@ - ### 1、环境依赖 - - paddlepaddle >= 1.8.2 + - paddlepaddle >= 2.1.0 - - paddlehub >= 1.7.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + - paddlenlp >= 2.1.0 - - 额外依赖ddparser - - - ```shell - $ pip install ddparser - ``` + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) - ### 2、安装 @@ -42,9 +38,6 @@ - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) - - - ## 三、模型API预测 - ### 1、命令行预测 @@ -60,33 +53,59 @@ import cv2 import paddlehub as hub + # Load ddparser module = hub.Module(name="ddparser") - test_text = ["百度是一家高科技公司"] - results = module.parse(texts=test_text) + # String input + results = module.parse("百度是一家高科技公司") + print(results) + # [{'word': ['百度', '是', '一家', '高科技', '公司'], 'head': [2, 0, 5, 5, 2], 'deprel': ['SBV', 'HED', 'ATT', 'ATT', 'VOB']}] + + # List input + results = module.parse(["百度是一家高科技公司", "他送了一本书"]) print(results) + # [{'word': ['百度', '是', '一家', '高科技', '公司'], 'head': [2, 0, 5, 5, 2], 'deprel': ['SBV', 'HED', 'ATT', 'ATT', 'VOB']}, {'word': ['他', '送', '了', '一本', '书'], 'head': [2, 0, 2, 5, 2], 'deprel': ['SBV', 'HED', 'MT', 'ATT', 'VOB']}] - test_tokens = [['百度', '是', '一家', '高科技', '公司']] - results = module.parse(texts=test_text, return_visual = True) + # Use POS Tag and probability + module = hub.Module(name="ddparser", prob=True, use_pos=True) + results = module.parse("百度是一家高科技公司") print(results) + # [{'word': ['百度', '是', '一家', '高科技', '公司'], 'head': [2, 0, 5, 5, 2], 'deprel': ['SBV', 'HED', 'ATT', 'ATT', 'VOB'], 'postag': ['ORG', 'v', 'm', 'n', 'n'], 'prob': [1.0, 1.0, 1.0, 1.0, 1.0]}] - result = results[0] - data = module.visualize(result['word'],result['head'],result['deprel']) - # or data = result['visual'] - cv2.imwrite('test.jpg',data) + # Visualization mode + module = hub.Module(name="ddparser", return_visual=True) + data = module.visualize("百度是一家高科技公司") + cv2.imwrite('test.jpg', data) ``` - ### 3、API - ```python - def parse(texts=[], return\_visual=False) + def __init__( + tree=True, + prob=False, + use_pos=False, + batch_size=1, + return_visual=False) + ``` + - 模块初始化。 + + - **参数** + + - tree(bool): 输出结果是否需要满足树状结构,默认为True。 + - prob(bool): 是否输出概率值,默认为False。 + - use_pos(bool): 是否输出词性标签,默认为False。 + - batch_size(int): 批大小,默认为1。 + - return_visual(bool): 是否返回可视化结果(需配合visualize api使用),默认为False。 + + - ```python + def parse(texts) ``` - 依存分析接口,输入文本,输出依存关系。 - **参数** - - texts(list\[list\[str\] or list\[str\]]): 待预测数据。各元素可以是未分词的字符串,也可以是已分词的token列表。 - - return\_visual(bool): 是否返回依存分析可视化结果。如果为True,返回结果中将包含'visual'字段。 + - texts(str or list\[str\]]): 待预测数据。 - **返回** @@ -98,31 +117,29 @@ 'deprel': list[str], 当前成分与支配者的依存关系。 'prob': list[float], 从属者和支配者依存的概率。 'postag': list[str], 词性标签,只有当texts的元素是未分词的字符串时包含这个键。 - 'visual': 图像数组,可以使用cv2.imshow显示图像或cv2.imwrite保存图像。 + 'visual': numpy.ndarray, 图像数组,可以使用cv2.imshow显示图像或cv2.imwrite保存图像。 } - ```python - def visualize(word, head, deprel) + def visualize(text) ``` - - 可视化接口,输入依存分析接口得到的信息,输出依存图形数组。 + - 可视化接口,输入文本信息,输出依存图形数组。 - **参数** - - word(list\[list\[str\]\): 分词信息。 - - head(list\[int\]): 当前成分其支配者的id。 - - deprel(list\[str\]): 当前成分与支配者的依存关系。 + - text(str): 输入文本,支持string格式的单条文本输入。 - **返回** - - data(numpy.array): 图像数组。可以使用cv2.imshow显示图像或cv2.imwrite保存图像。 + - data(numpy.ndarray): 图像数组。可以使用cv2.imshow显示图像或cv2.imwrite保存图像。 ## 四、服务部署 -- PaddleHub Serving可以部署一个在线情感分析服务,可以将此接口用于在线web应用。 +- PaddleHub Serving可以部署一个在线句法分析服务,可以将此接口用于在线web应用。 - ## 第一步:启动PaddleHub Serving @@ -148,38 +165,45 @@ import requests import json - import numpy as np - import cv2 - - # 待预测数据 + # 待预测数据(input string) text = ["百度是一家高科技公司"] # 设置运行配置 - return_visual = True - data = {"texts": text, "return_visual": return_visual} + data = {"texts": text} # 指定预测方法为DuDepParser并发送post请求,content-type类型应指定json方式 - url = "http://0.0.0.0:8866/predict/ddparser" + url = "http://127.0.0.1:8866/predict/ddparser" headers = {"Content-Type": "application/json"} r = requests.post(url=url, headers=headers, data=json.dumps(data)) - results = r.json()['results'] - for i in range(len(results)): - print(results[i]['word']) - # 不同于本地调用parse接口,serving返回的图像是list类型的,需要先用numpy加载再显示或保存。 - cv2.imwrite('%s.jpg'%i, np.array(results[i]['visual'])) + print(r.json()) + # {'msg': '', 'results': [{'deprel': ['SBV', 'HED', 'ATT', 'VOB'], 'head': ['2', '0', '4', '2'], 'word': ['百度', '是', '一家', '公司']}], 'status': '000'} + + # 待预测数据(input list) + text = ["百度是一家公司", "他送了一本书"] + + # 设置运行配置 + data = {"texts": text} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + # {'msg': '', 'results': [{'deprel': ['SBV', 'HED', 'ATT', 'VOB'], 'head': ['2', '0', '4', '2'], 'word': ['百度', '是', '一家', '公司']}, {'deprel': ['SBV', 'HED', 'MT', 'ATT', 'VOB'], 'head': ['2', '0', '2', '5', '2'], 'word': ['他', '送', '了', '一本', '书']}], 'status': '000'} + ``` - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) - ## 五、更新历史 * 1.0.0 初始发布 +* 1.1.0 + + 适配paddlepaddle 2.1版本 + - ```shell - $ hub install ddparser==1.0.0 + $ hub install ddparser==1.1.0 ``` diff --git a/modules/text/syntactic_analysis/DDParser/module.py b/modules/text/syntactic_analysis/DDParser/module.py index a63e4a6e4a6befc9bddf42bdd64bd0f87db17666..6def65a819663bf565e54b6459ea747f1d662f40 100644 --- a/modules/text/syntactic_analysis/DDParser/module.py +++ b/modules/text/syntactic_analysis/DDParser/module.py @@ -2,52 +2,48 @@ import os import argparse -import numpy as np -import matplotlib as mpl -import matplotlib.pyplot as plt -import matplotlib.font_manager as font_manager -from paddle import fluid import paddlehub as hub from paddlehub.module.module import serving, moduleinfo, runnable - -try: - from ddparser import DDParser as DDParserModel -except: - raise ImportError( - "The module requires additional dependencies: ddparser. Please run 'pip install ddparser' to install it.") +from paddlenlp import Taskflow @moduleinfo( name="ddparser", - version="1.0.0", + version="1.1.0", summary="Baidu's open-source DDParser model.", author="baidu-nlp", author_email="", type="nlp/syntactic_analysis") class ddparser(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - self.ddp = DDParserModel(prob=True, use_pos=True) - self.font = font_manager.FontProperties(fname=os.path.join(self.directory, "SourceHanSans-Regular.ttf")) + def __init__(self, + tree=True, + prob=False, + use_pos=False, + batch_size=1, + return_visual=False, + ): + self.ddp = Taskflow( + "dependency_parsing", + tree=tree, + prob=prob, + use_pos=use_pos, + batch_size=batch_size, + return_visual=return_visual) @serving - def serving_parse(self, texts=[], return_visual=False): - results = self.parse(texts, return_visual) - if return_visual: - for i, result in enumerate(results): - result['visual'] = result['visual'].tolist() - + def serving_parse(self, texts): + results = self.parse(texts) + for i in range(len(results)): + org_list = results[i]["head"] + results[i]["head"] = [str(x) for x in org_list] return results - def parse(self, texts=[], return_visual=False): + def parse(self, texts): """ parse the dependency. Args: - texts(list[list[str] or list[list[str]]]): the input texts to be parse. It should be a list with elements: untokenized string or tokens list. - return_visual(bool): if set True, the result will contain the dependency visualization. + texts(str or list[str]): the input texts to be parse. Returns: results(list[dict]): a list, with elements corresponding to each of the elements in texts. The element is a dictionary of shape: @@ -57,23 +53,10 @@ class ddparser(hub.NLPPredictionModule): 'deprel': list[str], the dependency relation. 'prob': list[float], the prediction probility of the dependency relation. 'postag': list[str], the POS tag. If the element of the texts is list, the key 'postag' will not return. - 'visual' : list[numpy.array]: the dependency visualization. Use cv2.imshow to show or cv2.imwrite to save it. If return_visual=False, it will not return. + 'visual' : numpy.ndarray: the dependency visualization. Use cv2.imshow to show or cv2.imwrite to save it. If return_visual=False, it will not return. } """ - - if not texts: - return - if all([isinstance(i, str) and i for i in texts]): - do_parse = self.ddp.parse - elif all([isinstance(i, list) and i for i in texts]): - do_parse = self.ddp.parse_seg - else: - raise ValueError("All of the elements should be string or list") - results = do_parse(texts) - if return_visual: - for result in results: - result['visual'] = self.visualize(result['word'], result['head'], result['deprel']) - return results + return self.ddp(texts) @runnable def run_cmd(self, argvs): @@ -98,94 +81,21 @@ class ddparser(hub.NLPPredictionModule): return results - def visualize(self, word, head, deprel): + def visualize(self, text): """ Visualize the dependency. Args: - word: list[str], the tokenized words. - head: list[int], the head ids. - deprel: list[str], the dependency relation. + text(str): input text. Returns: - data: a numpy array, use cv2.imshow to show it or cv2.imwrite to save it. + data(numpy.ndarray): a numpy array, use cv2.imshow to show it or cv2.imwrite to save it. """ - nodes = ['ROOT'] + word - x = list(range(len(nodes))) - y = [0] * (len(nodes)) - fig, ax = plt.subplots() - # control the picture size - max_span = max([abs(i + 1 - j) for i, j in enumerate(head)]) - fig.set_size_inches((len(nodes), max_span / 2)) - # set the points - plt.scatter(x, y, c='w') - - for i in range(len(nodes)): - txt = nodes[i] - xytext = (i, 0) - if i == 0: - # set 'ROOT' - ax.annotate( - txt, - xy=xytext, - xycoords='data', - xytext=xytext, - textcoords='data', - ) - else: - xy = (head[i - 1], 0) - rad = 0.5 if head[i - 1] < i else -0.5 - # set the word - ax.annotate( - txt, - xy=xy, - xycoords='data', - xytext=(xytext[0] - 0.1, xytext[1]), - textcoords='data', - fontproperties=self.font) - # draw the curve - ax.annotate( - "", - xy=xy, - xycoords='data', - xytext=xytext, - textcoords='data', - arrowprops=dict( - arrowstyle="<-", - shrinkA=12, - shrinkB=12, - color='blue', - connectionstyle="arc3,rad=%s" % rad, - ), - ) - # set the deprel label. Calculate its position by the radius - text_x = min(i, head[i - 1]) + abs((i - head[i - 1])) / 2 - 0.2 - text_y = abs((i - head[i - 1])) / 4 - ax.annotate(deprel[i - 1], xy=xy, xycoords='data', xytext=[text_x, text_y], textcoords='data') - - # control the axis - plt.axis('equal') - plt.axis('off') - - # save to numpy array - fig.canvas.draw() - data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) - data = data.reshape(fig.canvas.get_width_height()[::-1] + (3, ))[:, :, ::-1] - return data - - -if __name__ == "__main__": - module = ddparser() - # Data to be predicted - test_text = ["百度是一家高科技公司"] - results = module.parse(texts=test_text) - print(results) - test_tokens = [['百度', '是', '一家', '高科技', '公司']] - results = module.parse(texts=test_text, return_visual=True) - print(results) - result = results[0] - data = module.visualize(result['word'], result['head'], result['deprel']) - import cv2 - import numpy as np - cv2.imwrite('test1.jpg', data) - cv2.imwrite('test2.jpg', result['visual']) + + if isinstance(text, str): + result = self.ddp(text)[0]['visual'] + return result + else: + raise TypeError( + "Invalid inputs, input text should be str, but type of {} found!".format(type(text)) + ) diff --git a/modules/text/syntactic_analysis/DDParser/requirements.txt b/modules/text/syntactic_analysis/DDParser/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..00b21c4566f6262b02e8cecaf5d4f26b00d2f19b --- /dev/null +++ b/modules/text/syntactic_analysis/DDParser/requirements.txt @@ -0,0 +1,2 @@ +paddlenlp>=2.1.1 +LAC>=2.1.2 diff --git a/modules/text/text_correction/ernie-csc/README.md b/modules/text/text_correction/ernie-csc/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a62e46376b658e2e10e745cdd9d657ee4b9259e3 --- /dev/null +++ b/modules/text/text_correction/ernie-csc/README.md @@ -0,0 +1,165 @@ +# ERNIE-CSC + +|模型名称|ERNIE-CSC| +| :--- | :---: | +|类别|文本-文本纠错| +|网络|ERNIE-CSC| +|数据集|SIGHAN| +|是否支持Fine-tuning|否| +|模型大小|436MB| +|最新更新日期|2021-12-10| +|数据指标|-| + + + +## 一、模型基本信息 + +- ### 模型介绍 + + - 中文文本纠错任务是一项NLP基础任务,其输入是一个可能含有语法错误的中文句子,输出是一个正确的中文句子。语法错误类型很多,有多字、少字、错别字等,目前最常见的错误类型是错别字。大部分研究工作围绕错别字这一类型进行研究。本文实现了百度在ACL 2021上提出结合拼音特征的Softmask策略的中文错别字纠错的下游任务网络,并提供预训练模型,模型结构如下: + +

+
+

+ + - 更多详情请[参考论文](https://aclanthology.org/2021.findings-acl.198.pdf) + + - 注:论文中暂未开源融合字音特征的预训练模型参数(即MLM-phonetics),所以本文提供的纠错模型是在ERNIE-1.0的参数上进行Finetune,纠错模型结构与论文保持一致。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlenlp >= 2.2.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install ernie-csc + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run ernie-csc --input_text="遇到逆竟时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。" + ``` + - 通过命令行方式实现文本纠错ernie-csc模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + # Load ernie-csc + module = hub.Module(name="ernie-csc") + + # String input + results = module.predict("遇到逆竟时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。") + print(results) + # [{'source': '遇到逆竟时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。', 'target': '遇到逆境时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。', 'errors': [{'position': 3, 'correction': {'竟': '境'}}]}] + + # List input + results = module.predict(['遇到逆竟时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。', '人生就是如此,经过磨练才能让自己更加拙壮,才能使自己更加乐观。']) + print(results) + # [{'source': '遇到逆竟时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。', 'target': '遇到逆境时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。', 'errors': [{'position': 3, 'correction': {'竟': '境'}}]}, {'source': '人生就是如此,经过磨练才能让自己更加拙壮,才能使自己更加乐观。', 'target': '人生就是如此,经过磨练才能让自己更加茁壮,才能使自己更加乐观。', 'errors': [{'position': 18, 'correction': {'拙': '茁'}}]}] + ``` + +- ### 3、API + + - ```python + def __init__(batch_size=32) + ``` + + - **参数** + + - batch_size(int): 每个预测批次的样本数目,默认为32。 + + - ```python + def predict(texts) + ``` + - 预测接口,输入文本,输出文本纠错结果。 + + - **参数** + + - texts(str or list\[str\]): 待预测数据。 + + - **返回** + + - results(list\[dict\]): 输出结果。每个元素都是dict类型,包含以下信息: + + { + 'source': str, 输入文本。 + 'target': str, 模型预测结果。 + 'errors': list[dict], 错误字符的详细信息,包含如下信息: + { + 'position': int, 错误字符的位置。 + 'correction': dict, 错误字符及其对应的校正结果。 + } + } + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线文本纠错服务,可以将此接口用于在线web应用。 + +- ## 第一步:启动PaddleHub Serving + + - 运行启动命令: + ```shell + $ hub serving start -m ernie-csc + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ## 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + + # 待预测数据(input string) + text = ["遇到逆竟时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。"] + + # 设置运行配置 + data = {"texts": text} + + # 指定预测方法为ernie-csc并发送post请求,content-type类型应指定json方式 + url = "http://127.0.0.1:8866/predict/ernie-csc" + headers = {"Content-Type": "application/json"} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + + # 待预测数据(input list) + text = ['遇到逆竟时,我们必须勇于面对,而且要愈挫愈勇,这样我们才能朝著成功之路前进。', '人生就是如此,经过磨练才能让自己更加拙壮,才能使自己更加乐观。'] + + # 设置运行配置 + data = {"texts": text} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install ernie-csc==1.0.0 + ``` diff --git a/modules/text/text_correction/ernie-csc/__init__.py b/modules/text/text_correction/ernie-csc/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/text_correction/ernie-csc/module.py b/modules/text/text_correction/ernie-csc/module.py new file mode 100644 index 0000000000000000000000000000000000000000..5d6454f4510833fcc97cdcad3983b71e28111f3f --- /dev/null +++ b/modules/text/text_correction/ernie-csc/module.py @@ -0,0 +1,67 @@ +# -*- coding:utf-8 -*- +import os +import argparse + +import paddle +import paddlehub as hub +from paddlehub.module.module import serving, moduleinfo, runnable +from paddlenlp import Taskflow + + +@moduleinfo( + name="ernie-csc", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/text_correction", + meta=hub.NLPPredictionModule) +class Ernie_CSC(paddle.nn.Layer): + def __init__(self, + batch_size=32): + self.corrector = Taskflow("text_correction", batch_size=batch_size) + + @serving + def predict(self, texts): + """ + The prediction interface for ernie-csc. + + Args: + texts(str or list[str]): the input texts to be predict. + + Returns: + results(list[dict]): inference results. The element is a dictionary consists of: + { + 'source': str, the input texts. + 'target': str, the predicted correct texts. + 'errors': list[dict], detail information of errors, the element is a dictionary consists of: + { + 'position': int, index of wrong charactor. + 'correction': int, the origin charactor and the predicted correct charactor. + } + } + """ + return self.corrector(texts) + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + input_data = self.check_input_data(args) + + results = self.predict(texts=input_data) + + return results diff --git a/modules/text/text_correction/ernie-csc/requirements.txt b/modules/text/text_correction/ernie-csc/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a8a87b33bb554a24de6344006c9e6c5f4f1c066 --- /dev/null +++ b/modules/text/text_correction/ernie-csc/requirements.txt @@ -0,0 +1 @@ +paddlenlp>=2.2.0 diff --git a/modules/thirdparty/text/text_generation/GPT2_Base_CN/README.md b/modules/text/text_generation/GPT2_Base_CN/README.md similarity index 100% rename from modules/thirdparty/text/text_generation/GPT2_Base_CN/README.md rename to modules/text/text_generation/GPT2_Base_CN/README.md diff --git a/modules/thirdparty/text/text_generation/GPT2_Base_CN/module.py b/modules/text/text_generation/GPT2_Base_CN/module.py similarity index 100% rename from modules/thirdparty/text/text_generation/GPT2_Base_CN/module.py rename to modules/text/text_generation/GPT2_Base_CN/module.py diff --git a/modules/thirdparty/text/text_generation/GPT2_CPM_LM/README.md b/modules/text/text_generation/GPT2_CPM_LM/README.md similarity index 100% rename from modules/thirdparty/text/text_generation/GPT2_CPM_LM/README.md rename to modules/text/text_generation/GPT2_CPM_LM/README.md diff --git a/modules/thirdparty/text/text_generation/GPT2_CPM_LM/module.py b/modules/text/text_generation/GPT2_CPM_LM/module.py similarity index 100% rename from modules/thirdparty/text/text_generation/GPT2_CPM_LM/module.py rename to modules/text/text_generation/GPT2_CPM_LM/module.py diff --git a/modules/text/text_generation/ernie_gen/README_en.md b/modules/text/text_generation/ernie_gen/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..6e8264a44ebac72db09c1c94581f477f0fa9e7ac --- /dev/null +++ b/modules/text/text_generation/ernie_gen/README_en.md @@ -0,0 +1,230 @@ +# ernie_gen + +| 模型名称 | ernie_gen | +| :------------------ | :-----------: | +| 类别 | 文本-文本生成 | +| 网络 | ERNIE-GEN | +| 数据集 | - | +| 是否支持Fine-tuning | 是 | +| 模型大小 | 85K | +| 最新更新日期 | 2021-07-20 | +| 数据指标 | - | + + +## 一、模型基本信息 + +- ### 模型介绍 + - ERNIE-GEN 是面向生成任务的预训练-微调框架,首次在预训练阶段加入span-by-span 生成任务,让模型每次能够生成一个语义完整的片段。在预训练和微调中通过填充式生成机制和噪声感知机制来缓解曝光偏差问题。此外, ERNIE-GEN 采样多片段-多粒度目标文本采样策略, 增强源文本和目标文本的关联性,加强了编码器和解码器的交互。 + - ernie_gen module是一个具备微调功能的module,可以快速完成特定场景module的制作。 + +

+ +

+ +- 更多详情请查看:[ERNIE-GEN:An Enhanced Multi-Flow Pre-training and Fine-tuning Framework for Natural Language Generation](https://arxiv.org/abs/2001.11314) + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 | [如何安装paddlehub](../../../../docs/docs_ch/get_start/installation.rst) + + - paddlenlp >= 2.0.0 + +- ### 2、安装 + + - ```shell + $ hub install ernie_gen + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ernie_gen can be used **only if it is first targeted at the specific dataset fine-tune** + - There are many types of text generation tasks, ernie_gen only provides the basic parameters for text generation, which can only be used after fine-tuning the dataset for a specific task + - Paddlehub provides a simple fine-tune dataset:[train.txt](./test_data/train.txt), [dev.txt](./test_data/dev.txt) + - Paddlehub also offers multiple fine-tune pre-training models that work well:[Couplet generated](../ernie_gen_couplet/),[Lover words generated](../ernie_gen_lover_words/),[Poetry generated](../ernie_gen_poetry/)等 + +### 1、Fine-tune and encapsulation + +- #### Fine-tune Code Example + + - ```python + import paddlehub as hub + + module = hub.Module(name="ernie_gen") + + result = module.finetune( + train_path='train.txt', + dev_path='dev.txt', + max_steps=300, + batch_size=2 + ) + + module.export(params_path=result['last_save_path'], module_name="ernie_gen_test", author="test") + ``` + +- #### API Instruction + + - ```python + def finetune(train_path, + dev_path=None, + save_dir="ernie_gen_result", + init_ckpt_path=None, + use_gpu=True, + max_steps=500, + batch_size=8, + max_encode_len=15, + max_decode_len=15, + learning_rate=5e-5, + warmup_proportion=0.1, + weight_decay=0.1, + noise_prob=0, + label_smooth=0, + beam_width=5, + length_penalty=1.0, + log_interval=100, + save_interval=200): + ``` + + - Fine tuning model parameters API + - **Parameter** + - train_path(str): Training set path. The format of the training set should be: "serial number\tinput text\tlabel", such as "1\t床前明月光\t疑是地上霜", note that \t cannot be replaced by Spaces + - dev_path(str): validation set path. The format of the validation set should be: "serial number\tinput text\tlabel, such as "1\t举头望明月\t低头思故乡", note that \t cannot be replaced by Spaces + - save_dir(str): Model saving and validation sets predict output paths. + - init_ckpt_path(str): The model initializes the loading path to realize incremental training. + - use_gpu(bool): use gpu or not + - max_steps(int): Maximum training steps. + - batch_size(int): Batch size during training. + - max_encode_len(int): Maximum encoding length. + - max_decode_len(int): Maximum decoding length. + - learning_rate(float): Learning rate size. + - warmup_proportion(float): Warmup rate. + - weight_decay(float): Weight decay size. + - noise_prob(float): Noise probability, refer to the Ernie Gen's paper. + - label_smooth(float): Label smoothing weight. + - beam_width(int): Beam size of validation set at the time of prediction. + - length_penalty(float): Length penalty weight for validation set prediction. + - log_interval(int): Number of steps at a training log printing interval. + - save_interval(int): training model save interval deployment. The validation set will make predictions after the model is saved. + - **Return** + - result(dict): Run result. Contains 2 keys: + - last_save_path(str): Save path of model at the end of training. + - last_ppl(float): Model confusion at the end of training. + + - ```python + def export( + params_path, + module_name, + author, + version="1.0.0", + summary="", + author_email="", + export_path="."): + ``` + + - Module exports an API through which training parameters can be packaged into a Hub Module with one click. + - **Parameter** + - params_path(str): Module parameter path. + - module_name(str): module name, such as "ernie_gen_couplet"。 + - author(str): Author name + - max_encode_len(int): Maximum encoding length. + - max_decode_len(int): Maximum decoding length. + - version(str): The version number. + - summary(str): English introduction to Module. + - author_email(str): Email address of the author. + - export_path(str): Module export path. + +### 2、模型预测 + +- **定义`$module_name`为export指定的module_name** + +- 模型转换完毕之后,通过`hub install $module_name`安装该模型,即可通过以下2种方式调用自制module: + +- #### 法1:命令行预测 + + - ```python + $ hub run $module_name --input_text="输入文本" --use_gpu True --beam_width 5 + ``` + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- #### 法2:API预测 + + - ```python + import paddlehub as hub + + module = hub.Module(name="$module_name") + + test_texts = ["输入文本1", "输入文本2"] + # generate包含3个参数,texts为输入文本列表,use_gpu指定是否使用gpu,beam_width指定beam search宽度。 + results = module.generate(texts=test_texts, use_gpu=True, beam_width=5) + for result in results: + print(result) + ``` + +- 您也可以将`$module_name`文件夹打包为tar.gz压缩包并联系PaddleHub工作人员上传至PaddleHub模型仓库,这样更多的用户可以通过一键安装的方式使用您的模型。PaddleHub非常欢迎您的贡献,共同推动开源社区成长。 + +## 四、服务部署 + +- PaddleHub Serving 可以部署一个文本生成的在线服务。 + +- ### 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m $module_name -p 8866 + ``` + + - 这样就完成了一个目标检测的服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ### 第二步:发送预测请求 + + - 客户端通过以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 发送HTTP请求 + + data = {'texts':["输入文本1", "输入文本2"], + 'use_gpu':True, 'beam_width':5} + headers = {"Content-type": "application/json"} + url = "http://127.0.0.1:8866/predict/$module_name" + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 保存结果 + results = r.json()["results"] + for result in results: + print(result) + ``` + +- **NOTE:** 上述`$module_name`为export指定的module_name + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.0.1 + + 修复模型导出bug + +* 1.0.2 + + 修复windows运行中的bug + +* 1.1.0 + + 接入PaddleNLP + + - ```shell + $ hub install ernie_gen==1.1.0 + ``` diff --git a/modules/text/text_generation/ernie_gen_leave/README.md b/modules/text/text_generation/ernie_gen_leave/README.md deleted file mode 100644 index ddde23ca6d86de747f1608c424e496671d0600cb..0000000000000000000000000000000000000000 --- a/modules/text/text_generation/ernie_gen_leave/README.md +++ /dev/null @@ -1,52 +0,0 @@ -## 概述 - - -ernie_gen_leave是基于ERNIE-GEN进行微调的模型,该模型的主要功能为生成请假条。输出一个关键词,给出你的请假理由。 - -## 命令行预测 - -```shell -$ hub run ernie_gen_leave --input_text="理由" --use_gpu True --beam_width 5 -``` - -## API - -```python -def generate(texts, use_gpu=False, beam_width=5): -``` - -预测API,输入关键字给出请假理由。 - -**参数** - -* texts (list\[str\]): 请假关键字; -* use\_gpu (bool): 是否使用 GPU;**若使用GPU,请先设置CUDA\_VISIBLE\_DEVICES环境变量**; -* beam\_width: beam search宽度,决定输出多少理由的数量。 - -**返回** - -* results (list\[list\]\[str\]): 输出请假理由。 - -**代码示例** - -```python -import paddlehub as hub - -module = hub.Module(name="ernie_gen_leave") - -test_texts = ["理由"] -results = module.generate(texts=test_texts, use_gpu=False, beam_width=2) -for result in results: - print(result) -``` - - -## 查看代码 - -https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.0.0-rc/modules/text/text_generation/ernie_gen_leave - -### 依赖 - -paddlepaddle >= 2.0.0rc1 - -paddlehub >= 2.0.0rc0 diff --git a/modules/text/text_generation/ernie_gen_leave/module.py b/modules/text/text_generation/ernie_gen_leave/module.py deleted file mode 100644 index 04d5d733b4f9b7322953c595c0c10ac3b74eb3c7..0000000000000000000000000000000000000000 --- a/modules/text/text_generation/ernie_gen_leave/module.py +++ /dev/null @@ -1,162 +0,0 @@ -# coding:utf-8 -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import ast -import json - -import paddle.fluid as fluid -import paddlehub as hub -from paddlehub.module.module import runnable -from paddlehub.compat.module.nlp_module import DataFormatError -from paddlehub.common.logger import logger -from paddlehub.module.module import moduleinfo, serving - -import argparse -import os -import numpy as np - -import paddle.fluid.dygraph as D - -from .model.tokenizing_ernie import ErnieTokenizer -from .model.decode import beam_search_infilling -from .model.modeling_ernie_gen import ErnieModelForGeneration - - -@moduleinfo( - name="ernie_gen_leave", - version="1.0.0", - summary="", - author="彭兆帅,郑博培", - author_email="1084667371@qq.com,2733821739@qq.com", - type="nlp/text_generation", -) -class ErnieGen(hub.NLPPredictionModule): - def _initialize(self): - """ - initialize with the necessary elements - """ - assets_path = os.path.join(self.directory, "assets") - gen_checkpoint_path = os.path.join(assets_path, "ernie_gen") - ernie_cfg_path = os.path.join(assets_path, 'ernie_config.json') - with open(ernie_cfg_path, encoding='utf8') as ernie_cfg_file: - ernie_cfg = dict(json.loads(ernie_cfg_file.read())) - ernie_vocab_path = os.path.join(assets_path, 'vocab.txt') - with open(ernie_vocab_path, encoding='utf8') as ernie_vocab_file: - ernie_vocab = {j.strip().split('\t')[0]: i for i, j in enumerate(ernie_vocab_file.readlines())} - - with fluid.dygraph.guard(fluid.CPUPlace()): - with fluid.unique_name.guard(): - self.model = ErnieModelForGeneration(ernie_cfg) - finetuned_states, _ = D.load_dygraph(gen_checkpoint_path) - self.model.set_dict(finetuned_states) - - self.tokenizer = ErnieTokenizer(ernie_vocab) - self.rev_dict = {v: k for k, v in self.tokenizer.vocab.items()} - self.rev_dict[self.tokenizer.pad_id] = '' # replace [PAD] - self.rev_dict[self.tokenizer.unk_id] = '' # replace [PAD] - self.rev_lookup = np.vectorize(lambda i: self.rev_dict[i]) - - @serving - def generate(self, texts, use_gpu=False, beam_width=5): - """ - Get the predict result from the input texts. - - Args: - texts(list): the input texts. - use_gpu(bool): whether use gpu to predict or not - beam_width(int): the beam search width. - - Returns: - results(list): the predict result. - """ - if texts and isinstance(texts, list) and all(texts) and all([isinstance(text, str) for text in texts]): - predicted_data = texts - else: - raise ValueError("The input texts should be a list with nonempty string elements.") - - if use_gpu and "CUDA_VISIBLE_DEVICES" not in os.environ: - use_gpu = False - logger.warning( - "use_gpu has been set False as you didn't set the environment variable CUDA_VISIBLE_DEVICES while using use_gpu=True" - ) - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - - with fluid.dygraph.guard(place): - self.model.eval() - results = [] - for text in predicted_data: - sample_results = [] - ids, sids = self.tokenizer.encode(text) - src_ids = D.to_variable(np.expand_dims(ids, 0)) - src_sids = D.to_variable(np.expand_dims(sids, 0)) - output_ids = beam_search_infilling( - self.model, - src_ids, - src_sids, - eos_id=self.tokenizer.sep_id, - sos_id=self.tokenizer.cls_id, - attn_id=self.tokenizer.vocab['[MASK]'], - max_decode_len=50, - max_encode_len=50, - beam_width=beam_width, - tgt_type_id=1) - output_str = self.rev_lookup(output_ids[0].numpy()) - - for ostr in output_str.tolist(): - if '[SEP]' in ostr: - ostr = ostr[:ostr.index('[SEP]')] - sample_results.append("".join(ostr)) - results.append(sample_results) - return results - - def add_module_config_arg(self): - """ - Add the command config options - """ - self.arg_config_group.add_argument( - '--use_gpu', type=ast.literal_eval, default=False, help="whether use GPU for prediction") - - self.arg_config_group.add_argument('--beam_width', type=int, default=5, help="the beam search width") - - @runnable - def run_cmd(self, argvs): - """ - Run as a command - """ - self.parser = argparse.ArgumentParser( - description='Run the %s module.' % self.name, - prog='hub run %s' % self.name, - usage='%(prog)s', - add_help=True) - - self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") - self.arg_config_group = self.parser.add_argument_group( - title="Config options", description="Run configuration for controlling module behavior, optional.") - - self.add_module_config_arg() - self.add_module_input_arg() - - args = self.parser.parse_args(argvs) - - try: - input_data = self.check_input_data(args) - except DataFormatError and RuntimeError: - self.parser.print_help() - return None - - results = self.generate(texts=input_data, use_gpu=args.use_gpu, beam_width=args.beam_width) - - return results diff --git a/modules/text/text_generation/ernie_gen_leave/test.py b/modules/text/text_generation/ernie_gen_leave/test.py deleted file mode 100644 index a7abf1b88bc07aaaf7b3f5d0800d55595569dbc0..0000000000000000000000000000000000000000 --- a/modules/text/text_generation/ernie_gen_leave/test.py +++ /dev/null @@ -1,8 +0,0 @@ -import paddlehub as hub - -module = hub.Module(name="ernie_gen_leave") - -test_texts = ["理由"] -results = module.generate(texts=test_texts, use_gpu=False, beam_width=2) -for result in results: - print(result) diff --git a/modules/text/text_generation/reading_pictures_writing_poems/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems/module.py b/modules/text/text_generation/reading_pictures_writing_poems/module.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems/module.py rename to modules/text/text_generation/reading_pictures_writing_poems/module.py diff --git a/modules/text/text_generation/reading_pictures_writing_poems/readme.md b/modules/text/text_generation/reading_pictures_writing_poems/readme.md index 5468e04ba180e4b73b2f5899b75b29db097a2a59..7a6351c346551b09b94f729beb6aaa4934a47af3 100644 --- a/modules/text/text_generation/reading_pictures_writing_poems/readme.md +++ b/modules/text/text_generation/reading_pictures_writing_poems/readme.md @@ -63,13 +63,13 @@ - ### 2、预测代码示例 - ```python - import paddlehub as hub - - readingPicturesWritingPoems = hub.Module(name="reading_pictures_writing_poems") - results = readingPicturesWritingPoems.WritingPoem(image = "scenery.jpg", use_gpu=False) - - for result in results: - print(result) + import paddlehub as hub + + readingPicturesWritingPoems = hub.Module(name="reading_pictures_writing_poems") + results = readingPicturesWritingPoems.WritingPoem(image = "scenery.jpg", use_gpu=False) + + for result in results: + print(result) ``` - ### 3、API diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnDetection/module.py diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/text_generation/ernie_gen_leave/model/decode.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py similarity index 100% rename from modules/text/text_generation/ernie_gen_leave/model/decode.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py diff --git a/modules/text/text_generation/ernie_gen_leave/model/file_utils.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py similarity index 100% rename from modules/text/text_generation/ernie_gen_leave/model/file_utils.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py diff --git a/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py similarity index 100% rename from modules/text/text_generation/ernie_gen_leave/model/modeling_ernie.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py diff --git a/modules/text/text_generation/ernie_gen_leave/model/modeling_ernie_gen.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py similarity index 100% rename from modules/text/text_generation/ernie_gen_leave/model/modeling_ernie_gen.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py diff --git a/modules/text/text_generation/ernie_gen_leave/model/tokenizing_ernie.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py similarity index 100% rename from modules/text/text_generation/ernie_gen_leave/model/tokenizing_ernie.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/module.py diff --git a/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/__init__.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py b/modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py similarity index 100% rename from modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py rename to modules/text/text_generation/reading_pictures_writing_poems_for_midautumn/module.py diff --git a/modules/text/text_review/porn_detection_cnn/README.md b/modules/text/text_review/porn_detection_cnn/README.md index e72a71a633cf9aea44fbc7f1c2ef84a2fe31711e..588ce206b11445a6754b8891918d92f6b9cd396f 100644 --- a/modules/text/text_review/porn_detection_cnn/README.md +++ b/modules/text/text_review/porn_detection_cnn/README.md @@ -1,93 +1,184 @@ -# PornDetectionCNN API说明 +# porn_detection_cnn -## detection(texts=[], data={}, use_gpu=False, batch_size=1) +| 模型名称 | porn_detection_cnn | +| :------------------ | :------------: | +| 类别 | 文本-文本审核 | +| 网络 | CNN | +| 数据集 | 百度自建数据集 | +| 是否支持Fine-tuning | 否 | +| 模型大小 | 20M | +| 最新更新日期 | 2021-02-26 | +| 数据指标 | - | -porn_detection_cnn预测接口,鉴定输入句子是否包含色情文案 +## 一、模型基本信息 -**参数** +- ### 模型介绍 + - 色情检测模型可自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别。 + - porn_detection_cnn采用CNN网络结构并按字粒度进行切词,具有较高的预测速度。该模型最大句子长度为256字,仅支持预测。 -* texts(list): 待预测数据,如果使用texts参数,则不用传入data参数,二选一即可 -* data(dict): 预测数据,key必须为text,value是带预测数据。如果使用data参数,则不用传入texts参数,二选一即可。建议使用texts参数,data参数后续会废弃。 -* use_gpu(bool): 是否使用GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置 -* batch_size(int): 批处理大小 -**返回** +## 二、安装 -* results(list): 鉴定结果 +- ### 1、环境依赖 -## context(trainable=False) + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取porn_detection_cnn的预训练program以及program的输入输出变量 +- ### 2、安装 -**参数** + - ```shell + $ hub install porn_detection_cnn + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* trainable(bool): trainable=True表示program中的参数在Fine-tune时需要微调,否则保持不变 -**返回** +## 三、模型API预测 -* inputs(dict): program的输入变量 -* outputs(dict): program的输出变量 -* main_program(Program): 带有预训练参数的program +- ### 1、命令行预测 -## get_labels() + - ```shell + $ hub run porn_detection_cnn --input_text "黄片下载" + ``` + + - 或者 -获取porn_detection_cnn的类别 + - ```shell + $ hub run porn_detection_cnn --input_file test.txt + ``` + + - 其中test.txt存放待审查文本,每行仅放置一段待审核文本 + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -**返回** +- ### 2、预测代码示例 -* labels(dict): porn_detection_cnn的类别(二分类,是/不是) + - ```python + import paddlehub as hub + + porn_detection_cnn = hub.Module(name="porn_detection_cnn") + + test_text = ["黄片下载", "打击黄牛党"] + + results = porn_detection_cnn.detection(texts=test_text, use_gpu=True, batch_size=1) + + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + print(results[index]) + + # 输出结果如下: + # {'text': '黄片下载', 'porn_detection_label': 1, 'porn_detection_key': 'porn', 'porn_probs': 0.9324, 'not_porn_probs': 0.0676} + # {'text': '打击黄牛党', 'porn_detection_label': 0, 'porn_detection_key': 'not_porn', 'porn_probs': 0.0004, 'not_porn_probs': 0.9996} + ``` -## get_vocab_path() + +- ### 3、API -获取预训练时使用的词汇表 + - ```python + def detection(texts=[], data={}, use_gpu=False, batch_size=1) + ``` + + - porn_detection_cnn预测接口,鉴定输入句子是否包含色情文案 -**返回** + - **参数** -* vocab_path(str): 词汇表路径 + - texts(list): 待预测数据,如果使用texts参数,则不用传入data参数,二选一即可 -# PornDetectionCNN 服务部署 + - data(dict): 预测数据,key必须为text,value是带预测数据。如果使用data参数,则不用传入texts参数,二选一即可。建议使用texts参数,data参数后续会废弃。 -PaddleHub Serving可以部署一个在线色情文案检测服务,可以将此接口用于在线web应用。 + - use_gpu(bool): 是否使用GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置 -## 第一步:启动PaddleHub Serving + - batch_size(int): 批处理大小 -运行启动命令: -```shell -$ hub serving start -m porn_detection_cnn -``` + - **返回** -启动时会显示加载模型过程,启动成功后显示 -```shell -Loading porn_detection_cnn successful. -``` + - results(list): 鉴定结果 -这样就完成了服务化API的部署,默认端口号为8866。 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + - ```python + def get_labels() + ``` + - 获取porn_detection_cnn的类别 -## 第二步:发送预测请求 + - **返回** -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - labels(dict): porn_detection_cnn的类别(二分类,是/不是) -```python -import requests -import json + - ```python + def get_vocab_path() + ``` -# 待预测数据 -text = ["黄片下载", "打击黄牛党"] + - 获取预训练时使用的词汇表 -# 设置运行配置 -# 对应本地预测porn_detection_cnn.detection(texts=text, batch_size=1, use_gpu=True) -data = {"texts": text, "batch_size": 1, "use_gpu":True} + - **返回** -# 指定预测方法为porn_detection_cnn并发送post请求,content-type类型应指定json方式 -# HOST_IP为服务器IP -url = "http://HOST_IP:8866/predict/porn_detection_cnn" -headers = {"Content-Type": "application/json"} -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + - vocab_path(str): 词汇表路径 -# 打印预测结果 -print(json.dumps(r.json(), indent=4, ensure_ascii=False)) -``` -关于PaddleHub Serving更多信息参考[服务部署](https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.6/docs/tutorial/serving.md) + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线色情文案检测服务,可以将此接口用于在线web应用。 + +- ## 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m porn_detection_cnn + ``` + + - 启动时会显示加载模型过程,启动成功后显示 + - ```shell + Loading porn_detection_cnn successful. + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ## 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 待预测数据 + text = ["黄片下载", "打击黄牛党"] + + # 设置运行配置 + # 对应本地预测porn_detection_cnn.detection(texts=text, batch_size=1, use_gpu=True) + data = {"texts": text, "batch_size": 1, "use_gpu":True} + + # 指定预测方法为porn_detection_cnn并发送post请求,content-type类型应指定json方式 + # HOST_IP为服务器IP + url = "http://HOST_IP:8866/predict/porn_detection_cnn" + headers = {"Content-Type": "application/json"} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(json.dumps(r.json(), indent=4, ensure_ascii=False)) + ``` + + - 关于PaddleHub Serving更多信息参考[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 大幅提升预测性能,同时简化接口使用 + + - ```shell + $ hub install porn_detection_cnn==1.1.0 + ``` + + diff --git a/modules/text/text_review/porn_detection_gru/README.md b/modules/text/text_review/porn_detection_gru/README.md index add8f9f971a6ea692d2091a571678b7dd1e0b042..46ba978316b494116319d82f004b4b4259327b5b 100644 --- a/modules/text/text_review/porn_detection_gru/README.md +++ b/modules/text/text_review/porn_detection_gru/README.md @@ -1,93 +1,185 @@ -# PornDetectionGRU API说明 +# porn_detection_gru -## detection(texts=[], data={}, use_gpu=False, batch_size=1) +| 模型名称 | porn_detection_gru | +| :------------------ | :------------: | +| 类别 | 文本-文本审核 | +| 网络 | GRU | +| 数据集 | 百度自建数据集 | +| 是否支持Fine-tuning | 否 | +| 模型大小 | 20M | +| 最新更新日期 | 2021-02-26 | +| 数据指标 | - | -porn_detection_gru预测接口,鉴定输入句子是否包含色情文案 +## 一、模型基本信息 -**参数** +- ### 模型介绍 + - 色情检测模型可自动判别文本是否涉黄并给出相应的置信度,对文本中的色情描述、低俗交友、污秽文案进行识别。 + - porn_detection_gru采用GRU网络结构并按字粒度进行切词,具有较高的预测速度。该模型最大句子长度为256字,仅支持预测。 -* texts(list): 待预测数据,如果使用texts参数,则不用传入data参数,二选一即可 -* data(dict): 预测数据,key必须为text,value是带预测数据。如果使用data参数,则不用传入texts参数,二选一即可。建议使用texts参数,data参数后续会废弃。 -* use_gpu(bool): 是否使用GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置 -* batch_size(int): 批处理大小 -**返回** +## 二、安装 -* results(list): 鉴定结果 +- ### 1、环境依赖 -## context(trainable=False) + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) -获取porn_detection_gru的预训练program以及program的输入输出变量 +- ### 2、安装 -**参数** + - ```shell + $ hub install porn_detection_gru + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) -* trainable(bool): trainable=True表示program中的参数在Fine-tune时需要微调,否则保持不变 -**返回** -* inputs(dict): program的输入变量 -* outputs(dict): program的输出变量 -* main_program(Program): 带有预训练参数的program +## 三、模型API预测 -## get_labels() +- ### 1、命令行预测 -获取porn_detection_gru的类别 + - ```shell + $ hub run porn_detection_gru --input_text "黄片下载" + ``` + + - 或者 -**返回** + - ```shell + $ hub run porn_detection_gru --input_file test.txt + ``` + + - 其中test.txt存放待审查文本,每行仅放置一段待审核文本 + + - 通过命令行方式实现hub模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -* labels(dict): porn_detection_gru的类别 +- ### 2、预测代码示例 -## get_vocab_path() + - ```python + import paddlehub as hub + + porn_detection_gru = hub.Module(name="porn_detection_gru") + + test_text = ["黄片下载", "打击黄牛党"] + + results = porn_detection_gru.detection(texts=test_text, use_gpu=True, batch_size=1) # 如不使用GPU,请修改为use_gpu=False + + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + print(results[index]) + + # 输出结果如下: + # {'text': '黄片下载', 'porn_detection_label': 1, 'porn_detection_key': 'porn', 'porn_probs': 0.9324, 'not_porn_probs': 0.0676} + # {'text': '打击黄牛党', 'porn_detection_label': 0, 'porn_detection_key': 'not_porn', 'porn_probs': 0.0004, 'not_porn_probs': 0.9996} + ``` -获取预训练时使用的词汇表 + +- ### 3、API -**返回** + - ```python + def detection(texts=[], data={}, use_gpu=False, batch_size=1) + ``` + + - porn_detection_gru预测接口,鉴定输入句子是否包含色情文案 -* vocab_path(str): 词汇表路径 + - **参数** -# PornDetectionGRU 服务部署 + - texts(list): 待预测数据,如果使用texts参数,则不用传入data参数,二选一即可 -PaddleHub Serving可以部署一个在线色情文案检测服务,可以将此接口用于在线web应用。 + - data(dict): 预测数据,key必须为text,value是带预测数据。如果使用data参数,则不用传入texts参数,二选一即可。建议使用texts参数,data参数后续会废弃。 -## 第一步:启动PaddleHub Serving + - use_gpu(bool): 是否使用GPU预测,如果使用GPU预测,则在预测之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置 -运行启动命令: -```shell -$ hub serving start -m porn_detection_gru -``` + - batch_size(int): 批处理大小 -启动时会显示加载模型过程,启动成功后显示 -```shell -Loading porn_detection_gru successful. -``` + - **返回** -这样就完成了服务化API的部署,默认端口号为8866。 + - results(list): 鉴定结果 -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 -## 第二步:发送预测请求 + - ```python + def get_labels() + ``` + - 获取porn_detection_gru的类别 -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + - **返回** -```python -import requests -import json + - labels(dict): porn_detection_gru的类别(二分类,是/不是) -# 待预测数据 -text = ["黄片下载", "打击黄牛党"] + - ```python + def get_vocab_path() + ``` -# 设置运行配置 -# 对应本地预测porn_detection_gru.detection(texts=text, batch_size=1, use_gpu=True) -data = {"texts": text, "batch_size": 1, "use_gpu":True} + - 获取预训练时使用的词汇表 -# 指定预测方法为porn_detection_gru并发送post请求,content-type类型应指定json方式 -# HOST_IP为服务器IP -url = "http://HOST_IP:8866/predict/porn_detection_gru" -headers = {"Content-Type": "application/json"} -r = requests.post(url=url, headers=headers, data=json.dumps(data)) + - **返回** -# 打印预测结果 -print(json.dumps(r.json(), indent=4, ensure_ascii=False)) -``` + - vocab_path(str): 词汇表路径 -关于PaddleHub Serving更多信息参考[服务部署](https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.6/docs/tutorial/serving.md) + + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线色情文案检测服务,可以将此接口用于在线web应用。 + +- ## 第一步:启动PaddleHub Serving + + - 运行启动命令: + - ```shell + $ hub serving start -m porn_detection_gru + ``` + + - 启动时会显示加载模型过程,启动成功后显示 + - ```shell + Loading porn_detection_gur successful. + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA_VISIBLE_DEVICES环境变量,否则不用设置。 + +- ## 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + - ```python + import requests + import json + + # 待预测数据 + text = ["黄片下载", "打击黄牛党"] + + # 设置运行配置 + # 对应本地预测porn_detection_gru.detection(texts=text, batch_size=1, use_gpu=True) + data = {"texts": text, "batch_size": 1, "use_gpu":True} + + # 指定预测方法为porn_detection_gru并发送post请求,content-type类型应指定json方式 + # HOST_IP为服务器IP + url = "http://HOST_IP:8866/predict/porn_detection_gru" + headers = {"Content-Type": "application/json"} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # 打印预测结果 + print(json.dumps(r.json(), indent=4, ensure_ascii=False)) + ``` + + - 关于PaddleHub Serving更多信息参考[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + + + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + +* 1.1.0 + + 大幅提升预测性能,同时简化接口使用 + + - ```shell + $ hub install porn_detection_gru==1.1.0 + ``` + diff --git a/modules/text/text_review/porn_detection_gru/README_en.md b/modules/text/text_review/porn_detection_gru/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..3a8446fad72920a5318888ddba9aea19cd6493bf --- /dev/null +++ b/modules/text/text_review/porn_detection_gru/README_en.md @@ -0,0 +1,183 @@ +# porn_detection_gru + +| Module Name | porn_detection_gru | +| :------------------ | :------------: | +| Category | text-text_review | +| Network | GRU | +| Dataset | Dataset built by Baidu | +| Fine-tuning supported or not | No | +| Module Size | 20M | +| Latest update date | 2021-02-26 | +| Data indicators | - | + +## I. Basic Information of Module + +- ### Module Introduction + - Pornography detection model can automatically distinguish whether the text is pornographic or not and give the corresponding confidence, and identify the pornographic description, vulgar communication and filthy text in the text. + - porn_detection_gru adopts GRU network structure and cuts words according to word granularity, which has high prediction speed. The maximum sentence length of this model is 256 words, and only prediction is supported. + + +## II. Installation + +- ### 1、Environmental dependence + + - paddlepaddle >= 1.6.2 + + - paddlehub >= 1.6.0 | [How to install PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、Installation + + - ```shell + $ hub install porn_detection_gru + ``` + - If you have problems during installation, please refer to:[windows_quickstart](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [linux_quickstart](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [mac_quickstart](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + +## III. Module API and Prediction + +- ### 1、Command line Prediction + + - ```shell + $ hub run porn_detection_gru --input_text "黄片下载" + ``` + + - or + + - ```shell + $ hub run porn_detection_gru --input_file test.txt + ``` + + - test.txt stores the text to be reviewed. Each line contains only one text + + - If you want to call the Hub module through the command line, please refer to: [PaddleHub Command line instruction](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、Prediction Code Example + + - ```python + import paddlehub as hub + + porn_detection_gru = hub.Module(name="porn_detection_gru") + + test_text = ["黄片下载", "打击黄牛党"] + + results = porn_detection_gru.detection(texts=test_text, use_gpu=True, batch_size=1) # If you do not use GPU, please set use_gpu=False + + for index, text in enumerate(test_text): + results[index]["text"] = text + for index, result in enumerate(results): + print(results[index]) + + # The output: + # {'text': '黄片下载', 'porn_detection_label': 1, 'porn_detection_key': 'porn', 'porn_probs': 0.9324, 'not_porn_probs': 0.0676} + # {'text': '打击黄牛党', 'porn_detection_label': 0, 'porn_detection_key': 'not_porn', 'porn_probs': 0.0004, 'not_porn_probs': 0.9996} + ``` + + +- ### 3、API + + - ```python + def detection(texts=[], data={}, use_gpu=False, batch_size=1) + ``` + + - prediction api of porn_detection_gru,to identify whether input sentences contain pornography + + - **Parameter** + + - texts(list): data to be predicted, if texts parameter is used, there is no need to pass in data parameter. You can use any of the two parameters. + + - data(dict): predicted data , key must be text,value is data to be predicted. if data parameter is used, there is no need to pass in texts parameter. You can use any of the two parameters. It is suggested to use texts parameter, and data parameter will be discarded later. + + - use_gpu(bool): use GPU or not. If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before prediction. Otherwise, need not set it. + + - **Return** + + - results(list): prediction result + + + - ```python + def get_labels() + ``` + - get the category of porn_detection_gru + + - **Return** + + - labels(dict): the category of porn_detection_gru (Dichotomies, yes/no) + + - ```python + def get_vocab_path() + ``` + + - get a vocabulary for pre-training + + - **Return** + + - vocab_path(str): Vocabulary path + + + +## IV. Server Deployment + +- PaddleHub Serving can deploy an online pornography detection service and you can use this interface for online Web applications. + +- ## Step 1: Start PaddleHub Serving + + - Run the startup command: + - ```shell + $ hub serving start -m porn_detection_gru + ``` + + - The model loading process is displayed on startup. After the startup is successful, the following information is displayed: + - ```shell + Loading porn_detection_gur successful. + ``` + + - The servitization API is now deployed and the default port number is 8866. + + - **NOTE:** If GPU is used for prediction, set CUDA_VISIBLE_DEVICES environment variable before prediction. Otherwise, need not set it. + + +- ## Step 2: Send a predictive request + + - After configuring the server, the following lines of code can be used to send the prediction request and obtain the prediction result + - ```python + import requests + import json + + # data to be predicted + text = ["黄片下载", "打击黄牛党"] + + # Set the running configuration + # Corresponding local forecast porn_detection_gru.detection(texts=text, batch_size=1, use_gpu=True) + data = {"texts": text, "batch_size": 1, "use_gpu":True} + + # set the prediction method to porn_detection_gru and send a POST request, content-type should be set to json + # HOST_IP is the IP address of the server + url = "http://HOST_IP:8866/predict/porn_detection_gru" + headers = {"Content-Type": "application/json"} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + + # print prediction result + print(json.dumps(r.json(), indent=4, ensure_ascii=False)) + ``` + + - For more information about PaddleHub Serving, please refer to:[Serving Deployment](../../../../docs/docs_ch/tutorial/serving.md) + + + + +## V. Release Note + +* 1.0.0 + + First release + +* 1.1.0 + + Improves prediction performance and simplifies interface usage + + - ```shell + $ hub install porn_detection_gru==1.1.0 + ``` + diff --git a/modules/text/text_to_knowledge/nptag/README.md b/modules/text/text_to_knowledge/nptag/README.md new file mode 100644 index 0000000000000000000000000000000000000000..12711225c2b5534a6fdaedae1bb0cacbe9e314cd --- /dev/null +++ b/modules/text/text_to_knowledge/nptag/README.md @@ -0,0 +1,168 @@ +# NPTag + +|模型名称|NPTag| +| :--- | :---: | +|类别|文本-文本知识关联| +|网络|ERNIE-CTM| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|378MB| +|最新更新日期|2021-12-10| +|数据指标|-| + + + +## 一、模型基本信息 + +- ### 模型介绍 + + - NPTag(名词短语标注工具)是首个能够覆盖所有中文名词性词汇及短语的细粒度知识标注工具,旨在解决NLP中,名词性短语收录不足,导致的OOV(out-of-vocabulary,超出收录词表)问题。可直接应用构造知识特征,辅助NLP任务 + + - NPTag特点 + + - 包含2000+细粒度类别,覆盖所有中文名词性短语的词类体系,更丰富的知识标注结果 + - NPTag试用的词类体系未覆盖所有中文名词性短语的词类体系,对所有类目做了更细类目的识别(如注射剂、鱼类、博物馆等),共包含2000+细粒度类别,且可以直接关联百科知识树。 + - 可自由定制的分类框架 + - NPTag开源版标注使用的词类体系是我们在实践中对**百科词条**分类应用较好的一个版本,用户可以自由定制自己的词类体系和训练样本,构建自己的NPTag,以获得更好的适配效果。例如,可按照自定义的类别构造训练样本,使用小学习率、短训练周期微调NPTag模型,即可获得自己定制的NPTag工具。 + + - 模型结构 + - NPTag使用ERNIE-CTM+prompt训练而成,使用启发式搜索解码,保证分类结果都在标签体系之内。 + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlenlp >= 2.2.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install nptag + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run nptag --input_text="糖醋排骨" + ``` + - 通过命令行方式实现NPTag模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + # Load NPTag + module = hub.Module(name="nptag") + + # String input + results = module.predict("糖醋排骨") + print(results) + # [{'text': '糖醋排骨', 'label': '菜品', 'category': '饮食类_菜品'}] + + # List input + results = module.predict(["糖醋排骨", "红曲霉菌"]) + print(results) + # [{'text': '糖醋排骨', 'label': '菜品', 'category': '饮食类_菜品'}, {'text': '红曲霉菌', 'label': '微生物', 'category': '生物类_微生物'}] + ``` + +- ### 3、API + + - ```python + def __init__( + batch_size=32, + max_seq_length=128, + linking=True, + ) + ``` + + - **参数** + + - batch_size(int): 每个预测批次的样本数目,默认为32。 + - max_seq_length(int): 最大句子长度,默认为128。 + - linking(bool): 实现与WordTag类别标签的linking,默认为True。 + + - ```python + def predict(texts) + ``` + - 预测接口,输入文本,输出名词短语标注结果。 + + - **参数** + + - texts(str or list\[str\]): 待预测数据。 + + - **返回** + + - results(list\[dict\]): 输出结果。每个元素都是dict类型,包含以下信息: + + { + 'text': str, 原始文本。 + 'label': str,预测结果。 + 'category':str,对应的WordTag类别标签。 + } + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线中文名词短语标注服务,可以将此接口用于在线web应用。 + +- ## 第一步:启动PaddleHub Serving + + - 运行启动命令: + ```shell + $ hub serving start -m nptag + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ## 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + + # 待预测数据(input string) + text = ["糖醋排骨"] + + # 设置运行配置 + data = {"texts": text} + + # 指定预测方法为WordTag并发送post请求,content-type类型应指定json方式 + url = "http://127.0.0.1:8866/predict/nptag" + headers = {"Content-Type": "application/json"} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + + # 待预测数据(input list) + text = ["糖醋排骨", "红曲霉菌"] + + # 设置运行配置 + data = {"texts": text} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install nptag==1.0.0 + ``` diff --git a/modules/text/text_to_knowledge/nptag/__init__.py b/modules/text/text_to_knowledge/nptag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/text_to_knowledge/nptag/module.py b/modules/text/text_to_knowledge/nptag/module.py new file mode 100644 index 0000000000000000000000000000000000000000..e7949e7fe99877edba5e11aaaf3f0b7445b4aaf2 --- /dev/null +++ b/modules/text/text_to_knowledge/nptag/module.py @@ -0,0 +1,72 @@ +# -*- coding:utf-8 -*- +import os +import argparse + +import paddle +import paddlehub as hub +from paddlehub.module.module import serving, moduleinfo, runnable +from paddlenlp import Taskflow + + +@moduleinfo( + name="nptag", + version="1.0.0", + summary="", + author="Baidu", + author_email="", + type="nlp/text_to_knowledge", + meta=hub.NLPPredictionModule) +class NPTag(paddle.nn.Layer): + def __init__(self, + batch_size=32, + max_seq_length=128, + linking=True, + ): + self.nptag = Taskflow("knowledge_mining", model="nptag", batch_size=batch_size, max_seq_length=max_seq_length, linking=linking) + + @serving + def predict(self, texts): + """ + The prediction interface for nptag. + + Args: + texts(str or list[str]): the input texts to be predict. + + Returns: + results(list[dict]): inference results. The element is a dictionary consists of: + { + 'text': str, the input texts. + 'head': list[dict], tagging results, the element is a dictionary consists of: + { + 'item': str, segmented word. + 'offset': int, the offset compared with the first character. + 'nptag_label':str, Part-Of-Speech label. + 'length': int, word length. + 'termid': str, link result with encyclopedia knowledge tree. + } + } + """ + return self.nptag(texts) + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + input_data = self.check_input_data(args) + + results = self.predict(texts=input_data) + + return results diff --git a/modules/text/text_to_knowledge/nptag/requirements.txt b/modules/text/text_to_knowledge/nptag/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a8a87b33bb554a24de6344006c9e6c5f4f1c066 --- /dev/null +++ b/modules/text/text_to_knowledge/nptag/requirements.txt @@ -0,0 +1 @@ +paddlenlp>=2.2.0 diff --git a/modules/text/text_to_knowledge/wordtag/README.md b/modules/text/text_to_knowledge/wordtag/README.md new file mode 100644 index 0000000000000000000000000000000000000000..42c6ed697daee83a4e893fd899b09690e9c809bc --- /dev/null +++ b/modules/text/text_to_knowledge/wordtag/README.md @@ -0,0 +1,186 @@ +# WordTag + +|模型名称|WordTag| +| :--- | :---: | +|类别|文本-文本知识关联| +|网络|ERNIE-CTM+CRF| +|数据集|百度自建数据集| +|是否支持Fine-tuning|否| +|模型大小|549MB| +|最新更新日期|2021-10-26| +|数据指标|-| + + + +## 一、模型基本信息 + +- ### 模型介绍 + + - WordTag(中文词类知识标注工具)是首个能够覆盖所有中文词汇的词类知识标注工具,旨在为中文文本解析提供全面、丰富的知识标注结果,可以应用于模板(挖掘模板、解析模板)生成与匹配、知识挖掘(新词发现、关系挖掘)等自然语言处理任务中,提升文本解析与挖掘精度;也可以作为中文文本特征生成器,为各类机器学习模型提供文本特征。 + +

+
+

+ + - WordTag特点 + + - 覆盖所有中文词汇的词类体系,更丰富的知识标注结果 + - WordTag使用的词类体系为覆盖所有中文词汇的词类体系,包括各类实体词与非实体词(如概念、实体/专名、语法词等)。WordTag开源版对部分类目(如组织机构等),做了更细类目的划分识别(如,医疗卫生机构、体育组织机构),对仅使用文本信息难以细分的类目(如人物类、作品类、品牌名等),不做更细粒度的词类识别。用户需要细粒度的词类识别时,可利用百科知识树的类别体系自行定制。 + + - 整合百科知识树链接结果,获得更丰富的标注知识 + - 如上图示例所示,各个切分标注结果中,除词类标注外,还整合了百科知识树的链接结果,用户可以结合百科知识树数据共同使用:如,利用百科知识树中的subtype获得更细的上位粒度,利用term的百科信息获得更加丰富的知识等。 + + - 可定制的词类序列标注框架 + - WordTag开源版标注使用的词类体系是我们在实践中对百科文本解析应用较好的一个版本,不同类型文本(如,搜索query、新闻资讯)的词类分布不同,用户可以利用百科知识树定制自己的词类体系和训练样本,构建自己的WordTag应用版,以获得更好的适配效果。例如,可将自定义的词表按照百科知识树的字段定义好,挂接/整合到百科知识树上,即可使用自己的Term数据定制标注样本和标注任务。 + + - 模型结构 + - 模型使用ERNIE-CTM+CRF训练而成,预测时使用viterbi解码,模型结构如下: + +

+
+

+ +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.1.0 + + - paddlenlp >= 2.1.0 + + - paddlehub >= 2.1.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install wordtag + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + $ hub run wordtag --input_text="《孤女》是2010年九州出版社出版的小说,作者是余兼羽。" + ``` + - 通过命令行方式实现WordTag模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + import paddlehub as hub + + # Load WordTag + module = hub.Module(name="wordtag") + + # String input + results = module.predict("《孤女》是2010年九州出版社出版的小说,作者是余兼羽。") + print(results) + # [{'text': '《孤女》是2010年九州出版社出版的小说,作者是余兼羽', 'items': [{'item': '《', 'offset': 0, 'wordtag_label': 'w', 'length': 1}, {'item': '孤女', 'offset': 1, 'wordtag_label': '作品类_实体', 'length': 2}, {'item': '》', 'offset': 3, 'wordtag_label': 'w', 'length': 1}, {'item': '是', 'offset': 4, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_是'}, {'item': '2010年', 'offset': 5, 'wordtag_label': '时间类', 'length': 5, 'termid': '时间阶段_cb_2010年'}, {'item': '九州出版社', 'offset': 10, 'wordtag_label': '组织机构类', 'length': 5, 'termid': '组织机构_eb_九州出版社'}, {'item': '出版', 'offset': 15, 'wordtag_label': '场景事件', 'length': 2, 'termid': '场景事件_cb_出版'}, {'item': '的', 'offset': 17, 'wordtag_label': '助词', 'length': 1, 'termid': '助词_cb_的'}, {'item': '小说', 'offset': 18, 'wordtag_label': '作品类_概念', 'length': 2, 'termid': '小说_cb_小说'}, {'item': ',', 'offset': 20, 'wordtag_label': 'w', 'length': 1}, {'item': '作者', 'offset': 21, 'wordtag_label': '人物类_概念', 'length': 2, 'termid': '人物_cb_作者'}, {'item': '是', 'offset': 23, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_是'}, {'item': '余兼羽', 'offset': 24, 'wordtag_label': '人物类_实体', 'length': 3}]}] + + # List input + results = module.predict(["热梅茶是一道以梅子为主要原料制作的茶饮", "《孤女》是2010年九州出版社出版的小说,作者是余兼羽"]) + print(results) + # [{'text': '热梅茶是一道以梅子为主要原料制作的茶饮', 'items': [{'item': '热梅茶', 'offset': 0, 'wordtag_label': '饮食类_饮品', 'length': 3}, {'item': '是', 'offset': 3, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_是'}, {'item': '一道', 'offset': 4, 'wordtag_label': '数量词', 'length': 2}, {'item': '以', 'offset': 6, 'wordtag_label': '介词', 'length': 1, 'termid': '介词_cb_以'}, {'item': '梅子', 'offset': 7, 'wordtag_label': '饮食类', 'length': 2, 'termid': '饮食_cb_梅'}, {'item': '为', 'offset': 9, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_为'}, {'item': '主要原料', 'offset': 10, 'wordtag_label': '物体类', 'length': 4, 'termid': '物品_cb_主要原料'}, {'item': '制作', 'offset': 14, 'wordtag_label': '场景事件', 'length': 2, 'termid': '场景事件_cb_制作'}, {'item': '的', 'offset': 16, 'wordtag_label': '助词', 'length': 1, 'termid': '助词_cb_的'}, {'item': '茶饮', 'offset': 17, 'wordtag_label': '饮食类_饮品', 'length': 2, 'termid': '饮品_cb_茶饮'}]}, {'text': '《孤女》是2010年九州出版社出版的小说,作者是余兼羽', 'items': [{'item': '《', 'offset': 0, 'wordtag_label': 'w', 'length': 1}, {'item': '孤女', 'offset': 1, 'wordtag_label': '作品类_实体', 'length': 2}, {'item': '》', 'offset': 3, 'wordtag_label': 'w', 'length': 1}, {'item': '是', 'offset': 4, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_是'}, {'item': '2010年', 'offset': 5, 'wordtag_label': '时间类', 'length': 5, 'termid': '时间阶段_cb_2010年'}, {'item': '九州出版社', 'offset': 10, 'wordtag_label': '组织机构类', 'length': 5, 'termid': '组织机构_eb_九州出版社'}, {'item': '出版', 'offset': 15, 'wordtag_label': '场景事件', 'length': 2, 'termid': '场景事件_cb_出版'}, {'item': '的', 'offset': 17, 'wordtag_label': '助词', 'length': 1, 'termid': '助词_cb_的'}, {'item': '小说', 'offset': 18, 'wordtag_label': '作品类_概念', 'length': 2, 'termid': '小说_cb_小说'}, {'item': ',', 'offset': 20, 'wordtag_label': 'w', 'length': 1}, {'item': '作者', 'offset': 21, 'wordtag_label': '人物类_概念', 'length': 2, 'termid': '人物_cb_作者'}, {'item': '是', 'offset': 23, 'wordtag_label': '肯定词', 'length': 1, 'termid': '肯定否定词_cb_是'}, {'item': '余兼羽', 'offset': 24, 'wordtag_label': '人物类_实体', 'length': 3}]}] + ``` + +- ### 3、API + + - ```python + def __init__( + batch_size=32, + max_seq_length=128, + linking=True, + ) + ``` + + - **参数** + + - batch_size(int): 每个预测批次的样本数目,默认为32。 + - max_seq_length(int): 最大句子长度,默认为128。 + - linking(bool): 是否返回百科知识树的链接结果,默认为True。 + + - ```python + def predict(texts) + ``` + - 预测接口,输入文本,输出词类标注结果以及百科知识树的链接结果。 + + - **参数** + + - texts(str or list\[str\]): 待预测数据。 + + - **返回** + + - results(list\[dict\]): 输出结果。每个元素都是dict类型,包含以下信息: + + { + 'text': str, 原始文本。 + 'items': list\[dict\], 标注结果, 包含以下信息: + { + 'item': str, 分词结果。 + 'offset': int, 与输入文本首个字的偏移值。 + 'wordtag_label': str, 词类知识标注结果。 + 'length': int, 词汇长度。 + 'termid': str, 与百科知识树的链接结果。 + } + } + +## 四、服务部署 + +- PaddleHub Serving可以部署一个在线中文词类知识标注服务,可以将此接口用于在线web应用。 + +- ## 第一步:启动PaddleHub Serving + + - 运行启动命令: + ```shell + $ hub serving start -m wordtag + ``` + + - 这样就完成了服务化API的部署,默认端口号为8866。 + + - **NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 + +- ## 第二步:发送预测请求 + + - 配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 + + ```python + import requests + import json + + # 待预测数据(input string) + text = ["《孤女》是2010年九州出版社出版的小说,作者是余兼羽。"] + + # 设置运行配置 + data = {"texts": text} + + # 指定预测方法为WordTag并发送post请求,content-type类型应指定json方式 + url = "http://127.0.0.1:8866/predict/wordtag" + headers = {"Content-Type": "application/json"} + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + + # 待预测数据(input list) + text = ["热梅茶是一道以梅子为主要原料制作的茶饮", "《孤女》是2010年九州出版社出版的小说,作者是余兼羽"] + + # 设置运行配置 + data = {"texts": text} + + r = requests.post(url=url, headers=headers, data=json.dumps(data)) + print(r.json()) + ``` + + - 关于PaddleHub Serving更多信息参考:[服务部署](../../../../docs/docs_ch/tutorial/serving.md) + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install wordtag==1.0.0 + ``` diff --git a/modules/text/text_to_knowledge/wordtag/__init__.py b/modules/text/text_to_knowledge/wordtag/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/text/text_to_knowledge/wordtag/module.py b/modules/text/text_to_knowledge/wordtag/module.py new file mode 100644 index 0000000000000000000000000000000000000000..a0ecde839fe36e85e8382e35504bddbe282fb2cc --- /dev/null +++ b/modules/text/text_to_knowledge/wordtag/module.py @@ -0,0 +1,72 @@ +# -*- coding:utf-8 -*- +import os +import argparse + +import paddle +import paddlehub as hub +from paddlehub.module.module import serving, moduleinfo, runnable +from paddlenlp import Taskflow + + +@moduleinfo( + name="wordtag", + version="1.0.0", + summary="", + author="baidu-nlp", + author_email="", + type="nlp/text_to_knowledge", + meta=hub.NLPPredictionModule) +class WordTag(paddle.nn.Layer): + def __init__(self, + batch_size=32, + max_seq_length=128, + linking=True, + ): + self.wordtag = Taskflow("knowledge_mining", batch_size=batch_size, max_seq_length=max_seq_length, linking=linking) + + @serving + def predict(self, texts): + """ + The prediction interface for wordtag. + + Args: + texts(str or list[str]): the input texts to be predict. + + Returns: + results(list[dict]): inference results. The element is a dictionary consists of: + { + 'text': str, the input texts. + 'head': list[dict], tagging results, the element is a dictionary consists of: + { + 'item': str, segmented word. + 'offset': int, the offset compared with the first character. + 'wordtag_label':str, Part-Of-Speech label. + 'length': int, word length. + 'termid': str, link result with encyclopedia knowledge tree. + } + } + """ + return self.wordtag(texts) + + @runnable + def run_cmd(self, argvs): + """ + Run as a command + """ + self.parser = argparse.ArgumentParser( + description='Run the %s module.' % self.name, + prog='hub run %s' % self.name, + usage='%(prog)s', + add_help=True) + + self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") + + self.add_module_input_arg() + + args = self.parser.parse_args(argvs) + + input_data = self.check_input_data(args) + + results = self.predict(texts=input_data) + + return results diff --git a/modules/text/text_to_knowledge/wordtag/requirements.txt b/modules/text/text_to_knowledge/wordtag/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7a8a87b33bb554a24de6344006c9e6c5f4f1c066 --- /dev/null +++ b/modules/text/text_to_knowledge/wordtag/requirements.txt @@ -0,0 +1 @@ +paddlenlp>=2.2.0 diff --git a/modules/thirdparty/image/Image_gan/style_transfer/ID_Photo_GEN/README.md b/modules/thirdparty/image/Image_gan/style_transfer/ID_Photo_GEN/README.md deleted file mode 100644 index 6707c477de171e19770d0aa5a0869ac6f4b81fa7..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/Image_gan/style_transfer/ID_Photo_GEN/README.md +++ /dev/null @@ -1,48 +0,0 @@ -## 概述 -* 基于 face_landmark_localization 和 FCN_HRNet_W18_Face_Seg 模型实现的证件照生成模型,一键生成白底、红底和蓝底的人像照片 - -## 效果展示 -![](https://img-blog.csdnimg.cn/20201224163307901.jpg) - -## API -```python -def Photo_GEN( - images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False, - use_gpu=False): -``` -证件照生成 API - -**参数** -* images (list[np.ndarray]) : 输入图像数据列表(BGR) -* paths (list[str]) : 输入图像路径列表 -* batch_size (int) : 数据批大小 -* output_dir (str) : 可视化图像输出目录 -* visualization (bool) : 是否可视化 -* use_gpu (bool) : 是否使用 GPU 进行推理 - -**返回** -* results (list[dict{"write":np.ndarray,"blue":np.ndarray,"red":np.ndarray}]): 输出图像数据列表 - -**代码示例** -```python -import cv2 -import paddlehub as hub - -model = hub.Module(name='ID_Photo_GEN') - -result = model.Photo_GEN( - images=[cv2.imread('/PATH/TO/IMAGE')], - paths=None, - batch_size=1, - output_dir='output', - visualization=True, - use_gpu=False) -``` - -## 依赖 -paddlepaddle >= 2.0.0rc0 -paddlehub >= 2.0.0b1 diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/README.md b/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/README.md deleted file mode 100644 index 493b8eaf78eaced6fd48a99783a19c3f7e0ac2d1..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_83w/README.md +++ /dev/null @@ -1,122 +0,0 @@ -## 模型概述 -UGATIT 图像风格转换模型 - -模型可将输入的人脸图像转换成动漫风格 - -模型权重来自UGATIT-Paddle开源项目 - -模型所使用的权重为genA2B_0835000 - -模型详情请参考[UGATIT-Paddle开源项目](https://github.com/miraiwk/UGATIT-paddle) - -## 模型安装 - -```shell -$hub install UGATIT_83w -``` - - -## API 说明 - -```python -def style_transfer( - self, - images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False -) -``` - -风格转换API,将输入的人脸图像转换成动漫风格。 - -转换效果图如下: - -![输入图像](https://ai-studio-static-online.cdn.bcebos.com/d130fabd8bd34e53b2f942b3766eb6bbd3c19c0676d04abfbd5cc4b83b66f8b6) -![输出图像](https://ai-studio-static-online.cdn.bcebos.com/78653331ee2d472b81ff5bbccd6a904a80d2c5208f9c42c789b4f09a1ef46332) - -**参数** - -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; -* paths (list\[str\]): 图片的路径,默认为 None; -* batch\_size (int): batch 的大小,默认设为 1; -* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; -* output\_dir (str): 图片的保存路径,默认设为 output。 - - -**返回** - -* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 - - -## 预测代码示例 - -```python -import cv2 -import paddlehub as hub - -# 模型加载 -# use_gpu:是否使用GPU进行预测 -model = hub.Module('UGATIT_83w', use_gpu=False) - -# 模型预测 -result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) - -# or -# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) -``` - -## 服务部署 - -PaddleHub Serving可以部署一个在线图像风格转换服务。 - -## 第一步:启动PaddleHub Serving - -运行启动命令: -```shell -$ hub serving start -m UGATIT_w83 -``` - -这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 - -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -## 第二步:发送预测请求 - -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - -```python -import requests -import json -import cv2 -import base64 - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/UGATIT_w83" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 打印预测结果 -print(r.json()["results"]) -``` - - -## 模型相关信息 - -### 模型代码 - -https://github.com/miraiwk/UGATIT-paddle - -### 依赖 - -paddlepaddle >= 1.8.0 - -paddlehub >= 1.8.0 diff --git a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/README.md b/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/README.md deleted file mode 100644 index 084188af3a11d767dd7a8480dc63d1bdd4bead19..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/Image_gan/style_transfer/UGATIT_92w/README.md +++ /dev/null @@ -1,122 +0,0 @@ -## 模型概述 -UGATIT 图像风格转换模型 - -模型可将输入的人脸图像转换成动漫风格 - -模型权重来自UGATIT-Paddle开源项目 - -模型所使用的权重为genA2B_0924000 - -模型详情请参考[UGATIT-Paddle开源项目](https://github.com/miraiwk/UGATIT-paddle) - -## 模型安装 - -```shell -$hub install UGATIT_92w -``` - - -## API 说明 - -```python -def style_transfer( - self, - images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False -) -``` - -风格转换API,将输入的人脸图像转换成动漫风格。 - -转换效果图如下: - -![输入图像](https://ai-studio-static-online.cdn.bcebos.com/d130fabd8bd34e53b2f942b3766eb6bbd3c19c0676d04abfbd5cc4b83b66f8b6) -![输出图像](https://ai-studio-static-online.cdn.bcebos.com/b7305162ff6345e9b04507a196ebe854907b446936934844be8aae4b0297db18) - -**参数** - -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; -* paths (list\[str\]): 图片的路径,默认为 None; -* batch\_size (int): batch 的大小,默认设为 1; -* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; -* output\_dir (str): 图片的保存路径,默认设为 output。 - - -**返回** - -* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 - - -## 预测代码示例 - -```python -import cv2 -import paddlehub as hub - -# 模型加载 -# use_gpu:是否使用GPU进行预测 -model = hub.Module(name='UGATIT_92w', use_gpu=False) - -# 模型预测 -result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) - -# or -# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) -``` - -## 服务部署 - -PaddleHub Serving可以部署一个在线图像风格转换服务。 - -## 第一步:启动PaddleHub Serving - -运行启动命令: -```shell -$ hub serving start -m UGATIT_92w -``` - -这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 - -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -## 第二步:发送预测请求 - -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - -```python -import requests -import json -import cv2 -import base64 - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/UGATIT_92w" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 打印预测结果 -print(r.json()["results"]) -``` - - -## 模型相关信息 - -### 模型代码 - -https://github.com/miraiwk/UGATIT-paddle - -### 依赖 - -paddlepaddle >= 1.8.0 - -paddlehub >= 1.8.0 diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md b/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md deleted file mode 100644 index 50205f868b12c2abaadad3f21d9cea6eaa0542d4..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_54/README.md +++ /dev/null @@ -1,125 +0,0 @@ -## 模型概述 -AnimeGAN V2 图像风格转换模型 - -模型可将输入的图像转换成Paprika风格 - -模型权重转换自AnimeGAN V2官方开源项目 - -模型所使用的权重为Paprika-54.ckpt - -模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) - -## 模型安装 - -```shell -$hub install animegan_v2_paprika_54 -``` - - -## API 说明 - -```python -def style_transfer( - self, - images=None, - paths=None, - output_dir='output', - visualization=False, - min_size=32, - max_size=1024 -) -``` - -风格转换API,将输入的图片转换为漫画风格。 - -转换效果图如下: - -![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) -![输出图像](https://ai-studio-static-online.cdn.bcebos.com/08ee95c94e0b4d4e8b2855a6ed40af5853b40c0047b3421aaa2f7c877fac5130) - - -**参数** - -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; -* paths (list\[str\]): 图片的路径,默认为 None; -* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; -* output\_dir (str): 图片的保存路径,默认设为 output; -* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; -* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 - - -**返回** - -* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 - - -## 预测代码示例 - -```python -import cv2 -import paddlehub as hub - -# 模型加载 -# use_gpu:是否使用GPU进行预测 -model = hub.Module(name='animegan_v2_paprika_54', use_gpu=False) - -# 模型预测 -result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) - -# or -# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) -``` - -## 服务部署 - -PaddleHub Serving可以部署一个在线图像风格转换服务。 - -## 第一步:启动PaddleHub Serving - -运行启动命令: -```shell -$ hub serving start -m animegan_v2_paprika_54 -``` - -这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 - -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -## 第二步:发送预测请求 - -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - -```python -import requests -import json -import cv2 -import base64 - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_54" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 打印预测结果 -print(r.json()["results"]) -``` - - -## 模型相关信息 - -### 模型代码 - -https://github.com/TachibanaYoshino/AnimeGANv2 - -### 依赖 - -paddlepaddle >= 1.8.0 - -paddlehub >= 1.8.0 diff --git a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md b/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md deleted file mode 100644 index 10af52a3a71f2dd26168b659dab0cb05f3818323..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/Image_gan/style_transfer/animegan_v2_paprika_97/README.md +++ /dev/null @@ -1,125 +0,0 @@ -## 模型概述 -AnimeGAN V2 图像风格转换模型 - -模型可将输入的图像转换成Paprika风格 - -模型权重转换自AnimeGAN V2官方开源项目 - -模型所使用的权重为Paprika-97.ckpt - -模型详情请参考[AnimeGAN V2 开源项目](https://github.com/TachibanaYoshino/AnimeGANv2) - -## 模型安装 - -```shell -$hub install animegan_v2_paprika_97 -``` - - -## API 说明 - -```python -def style_transfer( - self, - images=None, - paths=None, - output_dir='output', - visualization=False, - min_size=32, - max_size=1024 -) -``` - -风格转换API,将输入的图片转换为漫画风格。 - -转换效果图如下: - -![输入图像](https://ai-studio-static-online.cdn.bcebos.com/bd002c4bb6a7427daf26988770bb18648b7d8d2bfd6746bfb9a429db4867727f) -![输出图像](https://ai-studio-static-online.cdn.bcebos.com/3b962a18a22e43028cc5530db1c5adb1a42e6aae4bb74b8598ee30ed52b59c8b) - - -**参数** - -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\],默认为 None; -* paths (list\[str\]): 图片的路径,默认为 None; -* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; -* output\_dir (str): 图片的保存路径,默认设为 output; -* min\_size (int): 输入图片的短边最小尺寸,默认设为 32; -* max\_size (int): 输入图片的短边最大尺寸,默认设为 1024。 - - -**返回** - -* res (list\[numpy.ndarray\]): 输出图像数据,ndarray.shape 为 \[H, W, C\]。 - - -## 预测代码示例 - -```python -import cv2 -import paddlehub as hub - -# 模型加载 -# use_gpu:是否使用GPU进行预测 -model = hub.Module(name='animegan_v2_paprika_97', use_gpu=False) - -# 模型预测 -result = model.style_transfer(images=[cv2.imread('/PATH/TO/IMAGE')]) - -# or -# result = model.style_transfer(paths=['/PATH/TO/IMAGE']) -``` - -## 服务部署 - -PaddleHub Serving可以部署一个在线图像风格转换服务。 - -## 第一步:启动PaddleHub Serving - -运行启动命令: -```shell -$ hub serving start -m animegan_v2_paprika_97 -``` - -这样就完成了一个图像风格转换的在线服务API的部署,默认端口号为8866。 - -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -## 第二步:发送预测请求 - -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - -```python -import requests -import json -import cv2 -import base64 - - -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - - -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/animegan_v2_paprika_97" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 打印预测结果 -print(r.json()["results"]) -``` - - -## 模型相关信息 - -### 模型代码 - -https://github.com/TachibanaYoshino/AnimeGANv2 - -### 依赖 - -paddlepaddle >= 1.8.0 - -paddlehub >= 1.8.0 diff --git a/modules/thirdparty/image/classification/DriverStatusRecognition/README.md b/modules/thirdparty/image/classification/DriverStatusRecognition/README.md deleted file mode 100644 index 4de54de77da732623d1cb9066bd3f5d7b5fdecd4..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/classification/DriverStatusRecognition/README.md +++ /dev/null @@ -1,65 +0,0 @@ -DriverStatusRecognition -类别 图像 - 图像分类 -网络 MobileNetV3_small_ssld -数据集 分心司机检测数据集 - -# 模型概述 -驾驶员状态识别(DriverStatusRecognition),该模型可挖掘出人在疲劳状态下的表情特征,然后将这些定性的表情特征进行量化,提取出面部特征点及特征指标作为判断依据,再结合实验数据总结出基于这些参数的识别方法,最后输入获取到的状态数据进行识别和判断。该PaddleHub Module支持API预测及命令行预测。 - -# 选择模型版本进行安装 -$ hub install DriverStatusRecognition==1.0.0 - -# 在线体验 -[AI Studio快速体验](https://aistudio.baidu.com/aistudio/projectdetail/1649513) - -# 命令行预测示例 -$ hub run DriverStatusRecognition --image 1.png --use_gpu True - -# Module API说明 -## def predict(data) -驾驶员状态识别预测接口,输入一张图像,输出该图像上驾驶员的状态 -### 参数 -- data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 - -### 返回 -- result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 - -# 代码示例 - -## API调用 -~~~ -import cv2 -import paddlehub as hub - -module = hub.Module(directory='DriverStatusRecognition') # 一行代码实现模型调用 - -images = [cv2.imread('work/imgs/test/img_1622.jpg'), cv2.imread('work/imgs/test/img_14165.jpg'), cv2.imread('work/imgs/test/img_47183.jpg')] -results = module.predict(images=images) - -for result in results: - print(result) -~~~ - -## 命令行调用 -~~~ -$ hub run DriverStatusRecognition --image 1.png --use_gpu True -~~~ - -# 效果展示 - -## 原图 - - -## 输出结果 -~~~ -[{'category_id': 5, 'category': 'ch5', 'score': 0.47390476}] -[{'category_id': 2, 'category': 'ch2', 'score': 0.99997914}] -[{'category_id': 1, 'category': 'ch1', 'score': 0.99996376}] -~~~ - -# 贡献者 -郑博培、彭兆帅 - -# 依赖 -paddlepaddle >= 2.0.0
-paddlehub >= 2.0.0 diff --git a/modules/thirdparty/image/classification/SnakeIdentification/README.md b/modules/thirdparty/image/classification/SnakeIdentification/README.md deleted file mode 100644 index e39ea8de42d1d4c39a89bebab77f26143b6ea8df..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/classification/SnakeIdentification/README.md +++ /dev/null @@ -1,64 +0,0 @@ -SnakeIdentification -类别 图像 - 图像分类 -网络 ResNet50_vd_ssld -数据集 蛇种数据集 - -# 模型概述 -蛇种识别(SnakeIdentification),该模型可准确识别蛇的种类,并精准判断蛇的毒性。该PaddleHub Module支持API预测及命令行预测。 - -# 选择模型版本进行安装 -$ hub install SnakeIdentification==1.0.0 - -# 在线体验 -[AI Studio快速体验](https://aistudio.baidu.com/aistudio/projectdetail/1646951) - -# 命令行预测示例 -$ hub run SnakeIdentification --image 1.png --use_gpu True - -# Module API说明 -## def predict(data) -蛇种识别预测接口,输入一张图像,输出该图像上蛇的类别 -### 参数 -- data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 - -### 返回 -- result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 - -# 代码示例 - -## API调用 -~~~ -import cv2 -import paddlehub as hub - -module = hub.Module(name="SnakeIdentification") - -images = [cv2.imread('snake_data/class_1/2421.jpg')] - -# execute predict and print the result -results = module.predict(images=images) -for result in results: - print(result) -~~~ - -## 命令行调用 -~~~ -$ hub run SnakeIdentification --image 1.png --use_gpu True -~~~ - -# 效果展示 - -## 原图 - - -## 输出结果 -~~~ -[{'category_id': 0, 'category': '水蛇', 'score': 0.9999205}] -~~~ - -# 贡献者 -郑博培、彭兆帅 - -# 依赖 -paddlepaddle >= 2.0.0
-paddlehub >= 2.0.0 diff --git a/modules/thirdparty/image/classification/food_classification/README.md b/modules/thirdparty/image/classification/food_classification/README.md deleted file mode 100644 index 138bfcf037cc2e6d0f6ef71fb392b2d13cc2b309..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/classification/food_classification/README.md +++ /dev/null @@ -1,84 +0,0 @@ -# food_classification - -类别 图像 - 图像分类 - -网络 ResNet50_vd_ssld - - -> 模型概述 - -美食分类(food_classification),该模型可识别苹果派,小排骨,烤面包,牛肉馅饼,牛肉鞑靼。该PaddleHub Module支持API预测及命令行预测。 - -> 选择模型版本进行安装 - -```shell -$ hub install food_classification==1.0.0 -``` -> Module API说明 - -```python -def predict(self, - images=None, - paths=None, - batch_size=1, - use_gpu=False, - **kwargs): -``` -美食分类预测接口,输入一张图像,输出该图像上食物的类别 - -参数 - -* images (list[numpy.ndarray]): 图片数据,ndarray.shape 为 [H, W, C],BGR格式; -* paths (list[str]): 图片的路径; -* batch_size (int): batch 的大小; -* use_gpu (bool): 是否使用 GPU; - -返回 - -* res (list[dict]): 识别结果的列表,列表中每一个元素为 dict,各字段为: - * category_id (int): 类别的id; - * category(str): 类别; - * score(float): 准确率; - -## 代码示例 - -### API调用 - -```python -import cv2 -import paddlehub as hub - -module = hub.Module(name="food_classification") - -images = [cv2.imread('PATH/TO/IMAGE')] - -# execute predict and print the result -results = module.predict(images=images) -for result in results: - print(result) -``` - -### 命令行调用 -```shell -$ hub run food_classification --input_path /PATH/TO/IMAGE --use_gpu True -``` - -## 效果展示 - -### 原图 - - -### 输出结果 -```python -[{'category_id': 0, 'category': 'apple_pie', 'score': 0.9985085}] -``` - -## 贡献者 -彭兆帅、郑博培 - -## 依赖 -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 - -paddlex >= 1.3.7 diff --git a/modules/thirdparty/image/classification/marine_biometrics/README.md b/modules/thirdparty/image/classification/marine_biometrics/README.md deleted file mode 100644 index 6ba7acd92dc5f94c28a65695a7fcd0f93050190e..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/classification/marine_biometrics/README.md +++ /dev/null @@ -1,69 +0,0 @@ -marine_biometrics - -类别 图像 - 图像分类 - -网络 ResNet50_vd_ssld - -数据集 Fish4Knowledge - -# 模型概述 -海洋生物识别(marine_biometrics),该模型可准确识别鱼的种类。该PaddleHub Module支持API预测及命令行预测。 - -# 选择模型版本进行安装 -$ hub install marine_biometrics==1.0.0 - -# 在线体验 -[AI Studio快速体验](https://aistudio.baidu.com/aistudio/projectdetail/1667809) - -# 命令行预测示例 -$ hub run marine_biometrics --image 1.png --use_gpu True - -# Module API说明 -## def predict(data) -海洋生物识别预测接口,输入一张图像,输出该图像上鱼的类别 -### 参数 -- data:dict类型,key为image,str类型,value为待检测的图片路径,list类型。 - -### 返回 -- result:list类型,每个元素为对应输入图片的预测结果。预测结果为dict类型,key为该图片分类结果label,value为该label对应的概率 - -# 代码示例 - -## API调用 - -~~~ -import cv2 -import paddlehub as hub - -module = hub.Module(name="MarineBiometrics") - -images = [cv2.imread('PATH/TO/IMAGE')] - -# execute predict and print the result -results = module.predict(images=images) -for result in results: - print(result) -~~~ - -## 命令行调用 -~~~ -$ hub run marine_biometrics --image 1.png --use_gpu True -~~~ - -# 效果展示 - -## 原图 - - -## 输出结果 -~~~ -[{'category_id': 16, 'category': 'Plectroglyphidodon_dickii', 'score': 0.9932127}] -~~~ - -# 贡献者 -郑博培、彭兆帅 - -# 依赖 -paddlepaddle >= 2.0.0 - -paddlehub >= 2.0.0 diff --git a/modules/thirdparty/image/keypoint_detection/hand_pose_localization/README.md b/modules/thirdparty/image/keypoint_detection/hand_pose_localization/README.md deleted file mode 100644 index 4e247d9ae24f9f52f3f9f0a87b1d04fe12390b44..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/keypoint_detection/hand_pose_localization/README.md +++ /dev/null @@ -1,112 +0,0 @@ -## 模型概述 -openpose 手部关键点检测模型 - -模型详情请参考[openpose开源项目](https://github.com/CMU-Perceptual-Computing-Lab/openpose) - -## 模型安装 - -```shell -$hub install hand_pose_localization -``` - -## API 说明 - -```python -def keypoint_detection( - self, - images=None, - paths=None, - batch_size=1, - output_dir='output', - visualization=False -) -``` - -预测API,识别出人体手部关键点。 - -![手部关键点](https://ai-studio-static-online.cdn.bcebos.com/97e1ae7c1e68477d85b37f53ee997fbc4ef0fc12c7634301bc08749bd003cac0) - -**参数** - -* images (list\[numpy.ndarray\]): 图片数据,ndarray.shape 为 \[H, W, C\], 默认设为 None; -* paths (list\[str\]): 图片的路径, 默认设为 None; -* batch\_size (int): batch 的大小,默认设为 1; -* visualization (bool): 是否将识别结果保存为图片文件,默认设为 False; -* output\_dir (str): 图片的保存路径,默认设为 output。 - -**返回** - -* res (list[list[list[int]]]): 每张图片识别到的21个手部关键点组成的列表,每个关键点的格式为[x, y],若有关键点未识别到则为None - - -## 预测代码示例 - -```python -import cv2 -import paddlehub as hub - -# use_gpu:是否使用GPU进行预测 -model = hub.Module(name='hand_pose_localization', use_gpu=False) - -# 调用关键点检测API -result = model.keypoint_detection(images=[cv2.imread('/PATH/TO/IMAGE')]) - -# or -# result = model.keypoint_detection(paths=['/PATH/TO/IMAGE']) - -# 打印预测结果 -print(result) -``` - -## 服务部署 - -PaddleHub Serving可以部署一个在线人体手部关键点检测服务。 - -## 第一步:启动PaddleHub Serving - -运行启动命令: -```shell -$ hub serving start -m hand_pose_localization -``` - -这样就完成了一个人体手部关键点检测的在线服务API的部署,默认端口号为8866。 - -**NOTE:** 如使用GPU预测,则需要在启动服务之前,请设置CUDA\_VISIBLE\_DEVICES环境变量,否则不用设置。 - -## 第二步:发送预测请求 - -配置好服务端,以下数行代码即可实现发送预测请求,获取预测结果 - -```python -import requests -import json -import cv2 -import base64 - -# 图片Base64编码函数 -def cv2_to_base64(image): - data = cv2.imencode('.jpg', image)[1] - return base64.b64encode(data.tostring()).decode('utf8') - -# 发送HTTP请求 -data = {'images':[cv2_to_base64(cv2.imread("/PATH/TO/IMAGE"))]} -headers = {"Content-type": "application/json"} -url = "http://127.0.0.1:8866/predict/hand_pose_localization" -r = requests.post(url=url, headers=headers, data=json.dumps(data)) - -# 打印预测结果 -print(r.json()["results"]) -``` - - -## 模型相关信息 - -### 模型代码 - -https://github.com/CMU-Perceptual-Computing-Lab/openpose - -### 依赖 - -paddlepaddle >= 1.8.0 - -paddlehub >= 1.8.0 diff --git a/modules/thirdparty/image/semantic_segmentation/U2Net/README.md b/modules/thirdparty/image/semantic_segmentation/U2Net/README.md deleted file mode 100644 index 0bd5cd94cfec98440c20d652ca4afe186d7ee72f..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/semantic_segmentation/U2Net/README.md +++ /dev/null @@ -1,56 +0,0 @@ -## 概述 -* ![](http://latex.codecogs.com/svg.latex?U^2Net)的网络结构如下图,其类似于编码-解码(Encoder-Decoder)结构的 U-Net -* 每个 stage 由新提出的 RSU模块(residual U-block) 组成. 例如,En_1 即为基于 RSU 构建的 - -![](https://ai-studio-static-online.cdn.bcebos.com/999d37b4ffdd49dc9e3315b7cec7b2c6918fdd57c8594ced9dded758a497913d) - -## 效果展示 -![](https://ai-studio-static-online.cdn.bcebos.com/4d77bc3a05cf48bba6f67b797978f4cdf10f38288b9645d59393dd85cef58eff) -![](https://ai-studio-static-online.cdn.bcebos.com/865b7b6a262b4ce3bbba4a5c0d973d9eea428bc3e8af4f76a1cdab0c04e3dd33) -![](https://ai-studio-static-online.cdn.bcebos.com/11c9eba8de6d4316b672f10b285245061821f0a744e441f3b80c223881256ca0) - -## API -```python -def Segmentation( - images=None, - paths=None, - batch_size=1, - input_size=320, - output_dir='output', - visualization=False): -``` -图像前景背景分割 API - -**参数** -* images (list[np.ndarray]) : 输入图像数据列表(BGR) -* paths (list[str]) : 输入图像路径列表 -* batch_size (int) : 数据批大小 -* input_size (int) : 输入图像大小 -* output_dir (str) : 可视化图像输出目录 -* visualization (bool) : 是否可视化 - -**返回** -* results (list[np.ndarray]): 输出图像数据列表 - -**代码示例** -```python -import cv2 -import paddlehub as hub - -model = hub.Module(name='U2Net') - -result = model.Segmentation( - images=[cv2.imread('/PATH/TO/IMAGE')], - paths=None, - batch_size=1, - input_size=320, - output_dir='output', - visualization=True) -``` - -## 查看代码 -https://github.com/NathanUA/U-2-Net - -## 依赖 -paddlepaddle >= 2.0.0rc0 -paddlehub >= 2.0.0b1 diff --git a/modules/thirdparty/image/semantic_segmentation/U2Netp/README.md b/modules/thirdparty/image/semantic_segmentation/U2Netp/README.md deleted file mode 100644 index c0a9be7047ed9397870b4ffde8412c5bc06acdbd..0000000000000000000000000000000000000000 --- a/modules/thirdparty/image/semantic_segmentation/U2Netp/README.md +++ /dev/null @@ -1,57 +0,0 @@ -## 概述 -* ![](http://latex.codecogs.com/svg.latex?U^2Net)的网络结构如下图,其类似于编码-解码(Encoder-Decoder)结构的 U-Net -* 每个 stage 由新提出的 RSU模块(residual U-block) 组成. 例如,En_1 即为基于 RSU 构建的 -* ![](http://latex.codecogs.com/svg.latex?U^2Net^+)是一个小型化的![](http://latex.codecogs.com/svg.latex?U^2Net) - -![](https://ai-studio-static-online.cdn.bcebos.com/999d37b4ffdd49dc9e3315b7cec7b2c6918fdd57c8594ced9dded758a497913d) - -## 效果展示 -![](https://ai-studio-static-online.cdn.bcebos.com/4d77bc3a05cf48bba6f67b797978f4cdf10f38288b9645d59393dd85cef58eff) -![](https://ai-studio-static-online.cdn.bcebos.com/865b7b6a262b4ce3bbba4a5c0d973d9eea428bc3e8af4f76a1cdab0c04e3dd33) -![](https://ai-studio-static-online.cdn.bcebos.com/11c9eba8de6d4316b672f10b285245061821f0a744e441f3b80c223881256ca0) - -## API -```python -def Segmentation( - images=None, - paths=None, - batch_size=1, - input_size=320, - output_dir='output', - visualization=False): -``` -图像前景背景分割 API - -**参数** -* images (list[np.ndarray]) : 输入图像数据列表(BGR) -* paths (list[str]) : 输入图像路径列表 -* batch_size (int) : 数据批大小 -* input_size (int) : 输入图像大小 -* output_dir (str) : 可视化图像输出目录 -* visualization (bool) : 是否可视化 - -**返回** -* results (list[np.ndarray]): 输出图像数据列表 - -**代码示例** -```python -import cv2 -import paddlehub as hub - -model = hub.Module(name='U2Netp') - -result = model.Segmentation( - images=[cv2.imread('/PATH/TO/IMAGE')], - paths=None, - batch_size=1, - input_size=320, - output_dir='output', - visualization=True) -``` - -## 查看代码 -https://github.com/NathanUA/U-2-Net - -## 依赖 -paddlepaddle >= 2.0.0rc0 -paddlehub >= 2.0.0b1 diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems/README.md b/modules/thirdparty/text/text_generation/reading_pictures_writing_poems/README.md deleted file mode 100644 index ecc9ad2cb91ba323ed613072d3a0758733022332..0000000000000000000000000000000000000000 --- a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems/README.md +++ /dev/null @@ -1,42 +0,0 @@ -reading_pictures_writing_poems -类别 文本 - 文本生成 - -# 模型概述 -看图写诗(reading_pictures_writing_poems),该模型可自动根据图像生成古诗词。该PaddleHub Module支持预测。 - -# 选择模型版本进行安装 -$ hub install reading_pictures_writing_poems==1.0.0 - -# 命令行预测示例 -$ hub run reading_pictures_writing_poems --input_image "scenery.jpg" - -![](https://ai-studio-static-online.cdn.bcebos.com/69a9d5a5472449678a08e1ee5066c81b5859827647d74eb8a674afabbc205ae5) -
AI根据这张图片生成的古诗是:
-- 蕾蕾海河海,岳峰岳麓蔓。 -- 不萌枝上春,自结心中线。 - -
-怎么样?还不错吧! -# Module API说明 -## WritingPoem(self, image, use_gpu=False) -看图写诗预测接口,预测输入一张图像,输出一首古诗词 -### 参数 -- image(str): 待检测的图片路径 -- use_gpu (bool): 是否使用 GPU -### 返回 -- results (list[dict]): 识别结果的列表,列表中每一个元素为 dict,关键字有 image,Poetrys, 其中: -image字段为原输入图片的路径 -Poetrys字段为输出的古诗词 - -# 代码示例 -import paddlehub as hub - -readingPicturesWritingPoems = hub.Module(directory="./reading_pictures_writing_poems") -readingPicturesWritingPoems.WritingPoem(image = "scenery.jpg", use_gpu=True) - -# 贡献者 -郑博培、彭兆帅 - -# 依赖 -paddlepaddle >= 1.8.2 -paddlehub >= 1.8.0 diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py b/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py deleted file mode 100644 index d07a58b559796b0331946561ed2dcbdc85ffadae..0000000000000000000000000000000000000000 --- a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/decode.py +++ /dev/null @@ -1,259 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import numpy as np -from collections import namedtuple - -import paddle.fluid as F -import paddle.fluid.layers as L -import paddle.fluid.dygraph as D - - -def gen_bias(encoder_inputs, decoder_inputs, step): - decoder_bsz, decoder_seqlen = decoder_inputs.shape[:2] - attn_bias = L.reshape(L.range(0, decoder_seqlen, 1, dtype='float32') + 1, [1, -1, 1]) - decoder_bias = L.cast((L.matmul(attn_bias, 1. / attn_bias, transpose_y=True) >= 1.), - 'float32') #[1, 1, decoderlen, decoderlen] - encoder_bias = L.unsqueeze(L.cast(L.ones_like(encoder_inputs), 'float32'), [1]) #[bsz, 1, encoderlen] - encoder_bias = L.expand(encoder_bias, [1, decoder_seqlen, 1]) #[bsz,decoderlen, encoderlen] - decoder_bias = L.expand(decoder_bias, [decoder_bsz, 1, 1]) #[bsz, decoderlen, decoderlen] - if step > 0: - bias = L.concat([encoder_bias, L.ones([decoder_bsz, decoder_seqlen, step], 'float32'), decoder_bias], -1) - else: - bias = L.concat([encoder_bias, decoder_bias], -1) - return bias - - -@D.no_grad -def greedy_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - tgt_type_id=3): - model.eval() - _, logits, info = model(q_ids, q_sids) - gen_ids = L.argmax(logits, -1) - d_batch, d_seqlen = q_ids.shape - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - has_stopped = np.zeros([d_batch], dtype=np.bool) - gen_seq_len = np.zeros([d_batch], dtype=np.int64) - output_ids = [] - - past_cache = info['caches'] - - cls_ids = L.ones([d_batch], dtype='int64') * sos_id - attn_ids = L.ones([d_batch], dtype='int64') * attn_id - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch, 1])) - pos_ids += seqlen - _, logits, info = model( - ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) - gen_ids = L.argmax(logits, -1) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [L.concat([pk, k[:, :1, :]], 1) for pk, k in zip(past_cached_k, cached_k)] # concat cached - cached_v = [L.concat([pv, v[:, :1, :]], 1) for pv, v in zip(past_cached_v, cached_v)] - past_cache = (cached_k, cached_v) - - gen_ids = gen_ids[:, 1] - ids = L.stack([gen_ids, attn_ids], 1) - - gen_ids = gen_ids.numpy() - has_stopped |= (gen_ids == eos_id).astype(np.bool) - gen_seq_len += (1 - has_stopped.astype(np.int64)) - output_ids.append(gen_ids.tolist()) - if has_stopped.all(): - break - output_ids = np.array(output_ids).transpose([1, 0]) - return output_ids - - -BeamSearchState = namedtuple('BeamSearchState', ['log_probs', 'lengths', 'finished']) -BeamSearchOutput = namedtuple('BeamSearchOutput', ['scores', 'predicted_ids', 'beam_parent_ids']) - - -def log_softmax(x): - e_x = np.exp(x - np.max(x)) - return np.log(e_x / e_x.sum()) - - -def mask_prob(p, onehot_eos, finished): - is_finished = L.cast(L.reshape(finished, [-1, 1]) != 0, 'float32') - p = is_finished * (1. - L.cast(onehot_eos, 'float32')) * -9999. + (1. - is_finished) * p - return p - - -def hyp_score(log_probs, length, length_penalty): - lp = L.pow((5. + L.cast(length, 'float32')) / 6., length_penalty) - return log_probs / lp - - -def beam_search_step(state, logits, eos_id, beam_width, is_first_step, length_penalty): - """logits.shape == [B*W, V]""" - beam_size, vocab_size = logits.shape # as batch size=1 in this hub module. the first dim means bsz * beam_size equals beam_size - logits_np = logits.numpy() - for i in range(beam_size): - logits_np[i][17963] = 0 # make [UNK] prob = 0 - logits = D.to_variable(logits_np) - - bsz, beam_width = state.log_probs.shape - onehot_eos = L.cast(F.one_hot(L.ones([1], 'int64') * eos_id, vocab_size), 'int64') #[1, V] - - probs = L.log(L.softmax(logits)) #[B*W, V] - probs = mask_prob(probs, onehot_eos, state.finished) #[B*W, V] - allprobs = L.reshape(state.log_probs, [-1, 1]) + probs #[B*W, V] - - not_finished = 1 - L.reshape(state.finished, [-1, 1]) #[B*W,1] - not_eos = 1 - onehot_eos - length_to_add = not_finished * not_eos #[B*W,V] - alllen = L.reshape(state.lengths, [-1, 1]) + length_to_add - - allprobs = L.reshape(allprobs, [-1, beam_width * vocab_size]) - alllen = L.reshape(alllen, [-1, beam_width * vocab_size]) - allscore = hyp_score(allprobs, alllen, length_penalty) - if is_first_step: - allscore = L.reshape(allscore, [bsz, beam_width, -1])[:, 0, :] # first step only consiter beam 0 - scores, idx = L.topk(allscore, k=beam_width) #[B, W] - next_beam_id = idx // vocab_size #[B, W] - next_word_id = idx % vocab_size - - gather_idx = L.concat([L.where(idx != -1)[:, :1], L.reshape(idx, [-1, 1])], 1) - next_probs = L.reshape(L.gather_nd(allprobs, gather_idx), idx.shape) - next_len = L.reshape(L.gather_nd(alllen, gather_idx), idx.shape) - - gather_idx = L.concat([L.where(next_beam_id != -1)[:, :1], L.reshape(next_beam_id, [-1, 1])], 1) - next_finished = L.reshape(L.gather_nd(state.finished, gather_idx), - state.finished.shape) #[gather new beam state according to new beam id] - - next_finished += L.cast(next_word_id == eos_id, 'int64') - next_finished = L.cast(next_finished > 0, 'int64') - - next_state = BeamSearchState(log_probs=next_probs, lengths=next_len, finished=next_finished) - output = BeamSearchOutput(scores=scores, predicted_ids=next_word_id, beam_parent_ids=next_beam_id) - - return output, next_state - - -@D.no_grad -def beam_search_infilling(model, - q_ids, - q_sids, - sos_id, - eos_id, - attn_id, - max_encode_len=640, - max_decode_len=100, - beam_width=5, - tgt_type_id=3, - length_penalty=1.0): - model.eval() - _, __, info = model(q_ids, q_sids) - d_batch, d_seqlen = q_ids.shape - - state = BeamSearchState( - log_probs=L.zeros([d_batch, beam_width], 'float32'), - lengths=L.zeros([d_batch, beam_width], 'int64'), - finished=L.zeros([d_batch, beam_width], 'int64')) - outputs = [] - - def reorder_(t, parent_id): - """reorder cache according to parent beam id""" - gather_idx = L.where(parent_id != -1)[:, 0] * beam_width + L.reshape(parent_id, [-1]) - t = L.gather(t, gather_idx) - return t - - def tile_(t, times): - _shapes = list(t.shape[1:]) - ret = L.reshape(L.expand(L.unsqueeze(t, [1]), [ - 1, - times, - ] + [ - 1, - ] * len(_shapes)), [ - -1, - ] + _shapes) - return ret - - cached_k, cached_v = info['caches'] - cached_k = [tile_(k, beam_width) for k in cached_k] - cached_v = [tile_(v, beam_width) for v in cached_v] - past_cache = (cached_k, cached_v) - - q_ids = tile_(q_ids, beam_width) - seqlen = L.reduce_sum(L.cast(q_ids != 0, 'int64'), 1, keep_dim=True) - - cls_ids = L.ones([d_batch * beam_width], dtype='int64') * sos_id - attn_ids = L.ones([d_batch * beam_width], dtype='int64') * attn_id # SOS - ids = L.stack([cls_ids, attn_ids], -1) - for step in range(max_decode_len): - bias = gen_bias(q_ids, ids, step) - pos_ids = D.to_variable(np.tile(np.array([[step, step + 1]], dtype=np.int64), [d_batch * beam_width, 1])) - pos_ids += seqlen - - _, logits, info = model( - ids, L.ones_like(ids) * tgt_type_id, pos_ids=pos_ids, attn_bias=bias, past_cache=past_cache) - - output, state = beam_search_step( - state, - logits[:, 1], - eos_id=eos_id, - beam_width=beam_width, - is_first_step=(step == 0), - length_penalty=length_penalty) - outputs.append(output) - - past_cached_k, past_cached_v = past_cache - cached_k, cached_v = info['caches'] - cached_k = [ - reorder_(L.concat([pk, k[:, :1, :]], 1), output.beam_parent_ids) for pk, k in zip(past_cached_k, cached_k) - ] # concat cached - cached_v = [ - reorder_(L.concat([pv, v[:, :1, :]], 1), output.beam_parent_ids) for pv, v in zip(past_cached_v, cached_v) - ] - past_cache = (cached_k, cached_v) - - pred_ids_flatten = L.reshape(output.predicted_ids, [d_batch * beam_width]) - ids = L.stack([pred_ids_flatten, attn_ids], 1) - - if state.finished.numpy().all(): - break - - final_ids = L.stack([o.predicted_ids for o in outputs], 0) - final_parent_ids = L.stack([o.beam_parent_ids for o in outputs], 0) - final_ids = L.gather_tree(final_ids, final_parent_ids) #[:, :, - #0] #pick best beam - final_ids = L.transpose(L.reshape(final_ids, [-1, d_batch * 1, beam_width]), [1, 2, 0]) - return final_ids - - -en_patten = re.compile(r'^[a-zA-Z0-9]*$') - - -def post_process(token): - if token.startswith('##'): - ret = token[2:] - else: - if en_patten.match(token): - ret = ' ' + token - else: - ret = token - return ret diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py b/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py deleted file mode 100644 index 608be4efc6644626f7f408df200fd299f2dd997e..0000000000000000000000000000000000000000 --- a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/file_utils.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import os - -from tqdm import tqdm -from paddlehub.common.logger import logger -from paddlehub.common.dir import MODULE_HOME - - -def _fetch_from_remote(url, force_download=False): - import tempfile, requests, tarfile - cached_dir = os.path.join(MODULE_HOME, "ernie_for_gen") - if force_download or not os.path.exists(cached_dir): - with tempfile.NamedTemporaryFile() as f: - #url = 'https://ernie.bj.bcebos.com/ERNIE_stable.tgz' - r = requests.get(url, stream=True) - total_len = int(r.headers.get('content-length')) - for chunk in tqdm( - r.iter_content(chunk_size=1024), total=total_len // 1024, desc='downloading %s' % url, unit='KB'): - if chunk: - f.write(chunk) - f.flush() - logger.debug('extacting... to %s' % f.name) - with tarfile.open(f.name) as tf: - tf.extractall(path=cached_dir) - logger.debug('%s cached in %s' % (url, cached_dir)) - return cached_dir - - -def add_docstring(doc): - def func(f): - f.__doc__ += ('\n======other docs from supper class ======\n%s' % doc) - return f - - return func diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py b/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py deleted file mode 100644 index d5de28a5fee73371babd05b644e03a0f75ecdd5e..0000000000000000000000000000000000000000 --- a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie.py +++ /dev/null @@ -1,327 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import division -from __future__ import absolute_import -from __future__ import print_function -from __future__ import unicode_literals - -import logging - -import paddle.fluid.dygraph as D -import paddle.fluid as F -import paddle.fluid.layers as L - -log = logging.getLogger(__name__) - - -def _build_linear(n_in, n_out, name, init, act=None): - return D.Linear( - n_in, - n_out, - param_attr=F.ParamAttr(name='%s.w_0' % name if name is not None else None, initializer=init), - bias_attr='%s.b_0' % name if name is not None else None, - act=act) - - -def _build_ln(n_in, name): - return D.LayerNorm( - normalized_shape=n_in, - param_attr=F.ParamAttr( - name='%s_layer_norm_scale' % name if name is not None else None, initializer=F.initializer.Constant(1.)), - bias_attr=F.ParamAttr( - name='%s_layer_norm_bias' % name if name is not None else None, initializer=F.initializer.Constant(1.)), - ) - - -def append_name(name, postfix): - if name is None: - return None - elif name == '': - return postfix - else: - return '%s_%s' % (name, postfix) - - -class AttentionLayer(D.Layer): - def __init__(self, cfg, name=None): - super(AttentionLayer, self).__init__() - initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - n_head = cfg['num_attention_heads'] - assert d_model % n_head == 0 - d_model_q = cfg.get('query_hidden_size_per_head', d_model // n_head) * n_head - d_model_v = cfg.get('value_hidden_size_per_head', d_model // n_head) * n_head - self.n_head = n_head - self.d_key = d_model_q // n_head - self.q = _build_linear(d_model, d_model_q, append_name(name, 'query_fc'), initializer) - self.k = _build_linear(d_model, d_model_q, append_name(name, 'key_fc'), initializer) - self.v = _build_linear(d_model, d_model_v, append_name(name, 'value_fc'), initializer) - self.o = _build_linear(d_model_v, d_model, append_name(name, 'output_fc'), initializer) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=cfg['attention_probs_dropout_prob'], - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, queries, keys, values, attn_bias, past_cache): - assert len(queries.shape) == len(keys.shape) == len(values.shape) == 3 - - q = self.q(queries) - k = self.k(keys) - v = self.v(values) - - cache = (k, v) - if past_cache is not None: - cached_k, cached_v = past_cache - k = L.concat([cached_k, k], 1) - v = L.concat([cached_v, v], 1) - - q = L.transpose(L.reshape(q, [0, 0, self.n_head, q.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - k = L.transpose(L.reshape(k, [0, 0, self.n_head, k.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - v = L.transpose(L.reshape(v, [0, 0, self.n_head, v.shape[-1] // self.n_head]), - [0, 2, 1, 3]) #[batch, head, seq, dim] - - q = L.scale(q, scale=self.d_key**-0.5) - score = L.matmul(q, k, transpose_y=True) - if attn_bias is not None: - score += attn_bias - score = L.softmax(score, use_cudnn=True) - score = self.dropout(score) - - out = L.matmul(score, v) - out = L.transpose(out, [0, 2, 1, 3]) - out = L.reshape(out, [0, 0, out.shape[2] * out.shape[3]]) - - out = self.o(out) - return out, cache - - -class PositionwiseFeedForwardLayer(D.Layer): - def __init__(self, cfg, name=None): - super(PositionwiseFeedForwardLayer, self).__init__() - initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_ffn = cfg.get('intermediate_size', 4 * d_model) - assert cfg['hidden_act'] in ['relu', 'gelu'] - self.i = _build_linear(d_model, d_ffn, append_name(name, 'fc_0'), initializer, act=cfg['hidden_act']) - self.o = _build_linear(d_ffn, d_model, append_name(name, 'fc_1'), initializer) - prob = cfg.get('intermediate_dropout_prob', 0.) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs): - hidden = self.i(inputs) - hidden = self.dropout(hidden) - out = self.o(hidden) - return out - - -class ErnieBlock(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieBlock, self).__init__() - d_model = cfg['hidden_size'] - initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) - - self.attn = AttentionLayer(cfg, name=append_name(name, 'multi_head_att')) - self.ln1 = _build_ln(d_model, name=append_name(name, 'post_att')) - self.ffn = PositionwiseFeedForwardLayer(cfg, name=append_name(name, 'ffn')) - self.ln2 = _build_ln(d_model, name=append_name(name, 'post_ffn')) - prob = cfg.get('intermediate_dropout_prob', cfg['hidden_dropout_prob']) - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - def forward(self, inputs, attn_bias=None, past_cache=None): - attn_out, cache = self.attn(inputs, inputs, inputs, attn_bias, past_cache=past_cache) #self attn - attn_out = self.dropout(attn_out) - hidden = attn_out + inputs - hidden = self.ln1(hidden) # dropout/ add/ norm - - ffn_out = self.ffn(hidden) - ffn_out = self.dropout(ffn_out) - hidden = ffn_out + hidden - hidden = self.ln2(hidden) - return hidden, cache - - -class ErnieEncoderStack(D.Layer): - def __init__(self, cfg, name=None): - super(ErnieEncoderStack, self).__init__() - n_layers = cfg['num_hidden_layers'] - self.block = D.LayerList([ErnieBlock(cfg, append_name(name, 'layer_%d' % i)) for i in range(n_layers)]) - - def forward(self, inputs, attn_bias=None, past_cache=None): - if past_cache is not None: - assert isinstance( - past_cache, - tuple), 'unknown type of `past_cache`, expect tuple or list. got %s' % repr(type(past_cache)) - past_cache = list(zip(*past_cache)) - else: - past_cache = [None] * len(self.block) - cache_list_k, cache_list_v, hidden_list = [], [], [inputs] - - for b, p in zip(self.block, past_cache): - inputs, cache = b(inputs, attn_bias=attn_bias, past_cache=p) - cache_k, cache_v = cache - cache_list_k.append(cache_k) - cache_list_v.append(cache_v) - hidden_list.append(inputs) - - return inputs, hidden_list, (cache_list_k, cache_list_v) - - -class ErnieModel(D.Layer): - def __init__(self, cfg, name=None): - """ - Fundamental pretrained Ernie model - """ - log.debug('init ErnieModel with config: %s' % repr(cfg)) - D.Layer.__init__(self) - d_model = cfg['hidden_size'] - d_emb = cfg.get('emb_size', cfg['hidden_size']) - d_vocab = cfg['vocab_size'] - d_pos = cfg['max_position_embeddings'] - d_sent = cfg.get("sent_type_vocab_size") or cfg['type_vocab_size'] - self.n_head = cfg['num_attention_heads'] - self.return_additional_info = cfg.get('return_additional_info', False) - initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) - - self.ln = _build_ln(d_model, name=append_name(name, 'pre_encoder')) - self.word_emb = D.Embedding([d_vocab, d_emb], - param_attr=F.ParamAttr( - name=append_name(name, 'word_embedding'), initializer=initializer)) - self.pos_emb = D.Embedding([d_pos, d_emb], - param_attr=F.ParamAttr( - name=append_name(name, 'pos_embedding'), initializer=initializer)) - self.sent_emb = D.Embedding([d_sent, d_emb], - param_attr=F.ParamAttr( - name=append_name(name, 'sent_embedding'), initializer=initializer)) - prob = cfg['hidden_dropout_prob'] - self.dropout = lambda i: L.dropout( - i, - dropout_prob=prob, - dropout_implementation="upscale_in_train", - ) if self.training else i - - self.encoder_stack = ErnieEncoderStack(cfg, append_name(name, 'encoder')) - if cfg.get('has_pooler', True): - self.pooler = _build_linear( - cfg['hidden_size'], cfg['hidden_size'], append_name(name, 'pooled_fc'), initializer, act='tanh') - else: - self.pooler = None - self.train() - - def eval(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).eval() - self.training = False - for l in self.sublayers(): - l.training = False - - def train(self): - if F.in_dygraph_mode(): - super(ErnieModel, self).train() - self.training = True - for l in self.sublayers(): - l.training = True - - def forward(self, - src_ids, - sent_ids=None, - pos_ids=None, - input_mask=None, - attn_bias=None, - past_cache=None, - use_causal_mask=False): - """ - Args: - src_ids (`Variable` of shape `[batch_size, seq_len]`): - Indices of input sequence tokens in the vocabulary. - sent_ids (optional, `Variable` of shape `[batch_size, seq_len]`): - aka token_type_ids, Segment token indices to indicate first and second portions of the inputs. - if None, assume all tokens come from `segment_a` - pos_ids(optional, `Variable` of shape `[batch_size, seq_len]`): - Indices of positions of each input sequence tokens in the position embeddings. - input_mask(optional `Variable` of shape `[batch_size, seq_len]`): - Mask to avoid performing attention on the padding token indices of the encoder input. - attn_bias(optional, `Variable` of shape `[batch_size, seq_len, seq_len] or False`): - 3D version of `input_mask`, if set, overrides `input_mask`; if set not False, will not apply attention mask - past_cache(optional, tuple of two lists: cached key and cached value, - each is a list of `Variable`s of shape `[batch_size, seq_len, hidden_size]`): - cached key/value tensor that will be concated to generated key/value when performing self attention. - if set, `attn_bias` should not be None. - - Returns: - pooled (`Variable` of shape `[batch_size, hidden_size]`): - output logits of pooler classifier - encoded(`Variable` of shape `[batch_size, seq_len, hidden_size]`): - output logits of transformer stack - """ - assert len(src_ids.shape) == 2, 'expect src_ids.shape = [batch, sequecen], got %s' % (repr(src_ids.shape)) - assert attn_bias is not None if past_cache else True, 'if `past_cache` is specified; attn_bias should not be None' - d_batch = L.shape(src_ids)[0] - d_seqlen = L.shape(src_ids)[1] - if pos_ids is None: - pos_ids = L.reshape(L.range(0, d_seqlen, 1, dtype='int32'), [1, -1]) - pos_ids = L.cast(pos_ids, 'int64') - if attn_bias is None: - if input_mask is None: - input_mask = L.cast(src_ids != 0, 'float32') - assert len(input_mask.shape) == 2 - input_mask = L.unsqueeze(input_mask, axes=[-1]) - attn_bias = L.matmul(input_mask, input_mask, transpose_y=True) - if use_causal_mask: - sequence = L.reshape(L.range(0, d_seqlen, 1, dtype='float32') + 1., [1, 1, -1, 1]) - causal_mask = L.cast((L.matmul(sequence, 1. / sequence, transpose_y=True) >= 1.), 'float32') - attn_bias *= causal_mask - else: - assert len(attn_bias.shape) == 3, 'expect attn_bias tobe rank 3, got %r' % attn_bias.shape - attn_bias = (1. - attn_bias) * -10000.0 - attn_bias = L.unsqueeze(attn_bias, [1]) - attn_bias = L.expand(attn_bias, [1, self.n_head, 1, 1]) # avoid broadcast =_= - attn_bias.stop_gradient = True - - if sent_ids is None: - sent_ids = L.zeros_like(src_ids) - - src_embedded = self.word_emb(src_ids) - pos_embedded = self.pos_emb(pos_ids) - sent_embedded = self.sent_emb(sent_ids) - embedded = src_embedded + pos_embedded + sent_embedded - - embedded = self.dropout(self.ln(embedded)) - - encoded, hidden_list, cache_list = self.encoder_stack(embedded, attn_bias, past_cache=past_cache) - if self.pooler is not None: - pooled = self.pooler(encoded[:, 0, :]) - else: - pooled = None - - additional_info = { - 'hiddens': hidden_list, - 'caches': cache_list, - } - - if self.return_additional_info: - return pooled, encoded, additional_info - else: - return pooled, encoded diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py b/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py deleted file mode 100644 index bc3d783d622356fad1e48f2767640a59edc05d70..0000000000000000000000000000000000000000 --- a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/modeling_ernie_gen.py +++ /dev/null @@ -1,65 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import paddle.fluid as F -import paddle.fluid.layers as L - -from .modeling_ernie import ErnieModel -from .modeling_ernie import _build_linear, _build_ln, append_name - - -class ErnieModelForGeneration(ErnieModel): - def __init__(self, cfg, name=None): - cfg['return_additional_info'] = True - cfg['has_pooler'] = False - super(ErnieModelForGeneration, self).__init__(cfg, name=name) - initializer = F.initializer.TruncatedNormal(scale=cfg['initializer_range']) - d_model = cfg['hidden_size'] - d_vocab = cfg['vocab_size'] - - self.mlm = _build_linear( - d_model, d_model, append_name(name, 'mask_lm_trans_fc'), initializer, act=cfg['hidden_act']) - self.mlm_ln = _build_ln(d_model, name=append_name(name, 'mask_lm_trans')) - self.mlm_bias = L.create_parameter( - dtype='float32', - shape=[d_vocab], - attr=F.ParamAttr( - name=append_name(name, 'mask_lm_out_fc.b_0'), initializer=F.initializer.Constant(value=0.0)), - is_bias=True, - ) - - def forward(self, src_ids, *args, **kwargs): - tgt_labels = kwargs.pop('tgt_labels', None) - tgt_pos = kwargs.pop('tgt_pos', None) - encode_only = kwargs.pop('encode_only', False) - _, encoded, info = ErnieModel.forward(self, src_ids, *args, **kwargs) - if encode_only: - return None, None, info - elif tgt_labels is None: - encoded = self.mlm(encoded) - encoded = self.mlm_ln(encoded) - logits = L.matmul(encoded, self.word_emb.weight, transpose_y=True) + self.mlm_bias - output_ids = L.argmax(logits, -1) - return output_ids, logits, info - else: - encoded_2d = L.gather_nd(encoded, tgt_pos) - encoded_2d = self.mlm(encoded_2d) - encoded_2d = self.mlm_ln(encoded_2d) - logits_2d = L.matmul(encoded_2d, self.word_emb.weight, transpose_y=True) + self.mlm_bias - if len(tgt_labels.shape) == 1: - tgt_labels = L.reshape(tgt_labels, [-1, 1]) - - loss = L.reduce_mean( - L.softmax_with_cross_entropy(logits_2d, tgt_labels, soft_label=(tgt_labels.shape[-1] != 1))) - return loss, logits_2d, info diff --git a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py b/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py deleted file mode 100644 index c9e5638f9a17207ce2d664c27376f08138876da3..0000000000000000000000000000000000000000 --- a/modules/thirdparty/text/text_generation/reading_pictures_writing_poems_for_midautumn/MidAutumnPoetry/model/tokenizing_ernie.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import six -import re -import logging -from functools import partial - -import numpy as np - -import io - -open = partial(io.open, encoding='utf8') - -log = logging.getLogger(__name__) - -_max_input_chars_per_word = 100 - - -def _wordpiece(token, vocab, unk_token, prefix='##', sentencepiece_prefix=''): - """ wordpiece: helloworld => [hello, ##world] """ - chars = list(token) - if len(chars) > _max_input_chars_per_word: - return [unk_token], [(0, len(chars))] - - is_bad = False - start = 0 - sub_tokens = [] - sub_pos = [] - while start < len(chars): - end = len(chars) - cur_substr = None - while start < end: - substr = "".join(chars[start:end]) - if start == 0: - substr = sentencepiece_prefix + substr - if start > 0: - substr = prefix + substr - if substr in vocab: - cur_substr = substr - break - end -= 1 - if cur_substr is None: - is_bad = True - break - sub_tokens.append(cur_substr) - sub_pos.append((start, end)) - start = end - if is_bad: - return [unk_token], [(0, len(chars))] - else: - return sub_tokens, sub_pos - - -class ErnieTokenizer(object): - def __init__(self, - vocab, - unk_token='[UNK]', - sep_token='[SEP]', - cls_token='[CLS]', - pad_token='[PAD]', - mask_token='[MASK]', - wordpiece_prefix='##', - sentencepiece_prefix='', - lower=True, - encoding='utf8', - special_token_list=[]): - if not isinstance(vocab, dict): - raise ValueError('expect `vocab` to be instance of dict, got %s' % type(vocab)) - self.vocab = vocab - self.lower = lower - self.prefix = wordpiece_prefix - self.sentencepiece_prefix = sentencepiece_prefix - self.pad_id = self.vocab[pad_token] - self.cls_id = cls_token and self.vocab[cls_token] - self.sep_id = sep_token and self.vocab[sep_token] - self.unk_id = unk_token and self.vocab[unk_token] - self.mask_id = mask_token and self.vocab[mask_token] - self.unk_token = unk_token - special_tokens = {pad_token, cls_token, sep_token, unk_token, mask_token} | set(special_token_list) - pat_str = '' - for t in special_tokens: - if t is None: - continue - pat_str += '(%s)|' % re.escape(t) - pat_str += r'([a-zA-Z0-9]+|\S)' - log.debug('regex: %s' % pat_str) - self.pat = re.compile(pat_str) - self.encoding = encoding - - def tokenize(self, text): - if len(text) == 0: - return [] - if six.PY3 and not isinstance(text, six.string_types): - text = text.decode(self.encoding) - if six.PY2 and isinstance(text, str): - text = text.decode(self.encoding) - - res = [] - for match in self.pat.finditer(text): - match_group = match.group(0) - if match.groups()[-1]: - if self.lower: - match_group = match_group.lower() - words, _ = _wordpiece( - match_group, - vocab=self.vocab, - unk_token=self.unk_token, - prefix=self.prefix, - sentencepiece_prefix=self.sentencepiece_prefix) - else: - words = [match_group] - res += words - return res - - def convert_tokens_to_ids(self, tokens): - return [self.vocab.get(t, self.unk_id) for t in tokens] - - def truncate(self, id1, id2, seqlen): - len1 = len(id1) - len2 = len(id2) - half = seqlen // 2 - if len1 > len2: - len1_truncated, len2_truncated = max(half, seqlen - len2), min(half, len2) - else: - len1_truncated, len2_truncated = min(half, seqlen - len1), max(half, seqlen - len1) - return id1[:len1_truncated], id2[:len2_truncated] - - def build_for_ernie(self, text_id, pair_id=[]): - """build sentence type id, add [CLS] [SEP]""" - text_id_type = np.zeros_like(text_id, dtype=np.int64) - ret_id = np.concatenate([[self.cls_id], text_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([[0], text_id_type, [0]], 0) - - if len(pair_id): - pair_id_type = np.ones_like(pair_id, dtype=np.int64) - ret_id = np.concatenate([ret_id, pair_id, [self.sep_id]], 0) - ret_id_type = np.concatenate([ret_id_type, pair_id_type, [1]], 0) - return ret_id, ret_id_type - - def encode(self, text, pair=None, truncate_to=None): - text_id = np.array(self.convert_tokens_to_ids(self.tokenize(text)), dtype=np.int64) - text_id_type = np.zeros_like(text_id, dtype=np.int64) - if pair is not None: - pair_id = np.array(self.convert_tokens_to_ids(self.tokenize(pair)), dtype=np.int64) - else: - pair_id = [] - if truncate_to is not None: - text_id, pair_id = self.truncate(text_id, [] if pair_id is None else pair_id, truncate_to) - - ret_id, ret_id_type = self.build_for_ernie(text_id, pair_id) - return ret_id, ret_id_type diff --git a/modules/thirdparty/video/Video_editing/SkyAR/README.md b/modules/thirdparty/video/Video_editing/SkyAR/README.md deleted file mode 100644 index 197fa16e2af158dfcc61d7db4a30da1b75e8f192..0000000000000000000000000000000000000000 --- a/modules/thirdparty/video/Video_editing/SkyAR/README.md +++ /dev/null @@ -1,109 +0,0 @@ -## 模型概述 -* SkyAR 是一种用于视频中天空置换与协调的视觉方法,该方法能够在风格可控的视频中自动生成逼真的天空背景。 -* 该算法是一种完全基于视觉的解决方案,它的好处就是可以处理非静态图像,同时不受拍摄设备的限制,也不需要用户交互,可以处理在线或离线视频。 -* 算法主要由三个核心组成: - * 天空抠图网络(Sky Matting Network):就是一种 Matting 图像分隔,用于检测视频帧中天空区域的视频,可以精确地获得天空蒙版。 - * 运动估计(Motion Estimation):恢复天空运动的运动估计器,使生成的天空与摄像机的运动同步。 - * 图像融合(Image Blending):将用户指定的天空模板混合到视频帧中。除此之外,还用于重置和着色,使混合结果在其颜色和动态范围内更具视觉逼真感。 -* 整体框架图如下: - - ![](http://p4.itc.cn/q_70/images03/20201114/42eaf00af8dd4aa4ae3c0cdc6e50b793.jpeg) -* 参考论文:Zhengxia Zou. [Castle in the Sky: Dynamic Sky Replacement and Harmonization in Videos](https://arxiv.org/abs/2010.11800). CoRR, abs/2010.118003, 2020. -* 官方开源项目: [jiupinjia/SkyAR](https://github.com/jiupinjia/SkyAR) -## 模型安装 -```shell -$hub install SkyAR -``` - -## 效果展示 -* 原始视频: - - ![原始视频](https://img-blog.csdnimg.cn/20210126142046572.gif) - -* 木星: - - ![木星](https://img-blog.csdnimg.cn/20210125211435619.gif) -* 雨天: - - ![雨天](https://img-blog.csdnimg.cn/2021012521152492.gif) -* 银河: - - ![银河](https://img-blog.csdnimg.cn/20210125211523491.gif) -* 第九区飞船: - - ![第九区飞船](https://img-blog.csdnimg.cn/20210125211520955.gif) -* 原始视频: - - ![原始视频](https://img-blog.csdnimg.cn/20210126142038716.gif) -* 漂浮城堡: - - ![漂浮城堡](https://img-blog.csdnimg.cn/20210125211514997.gif) -* 电闪雷鸣: - - ![电闪雷鸣](https://img-blog.csdnimg.cn/20210125211433591.gif) -* 超级月亮: - - ![超级月亮](https://img-blog.csdnimg.cn/20210125211417524.gif) - -## API 说明 - -```python -def MagicSky( - video_path, save_path, config='jupiter', - is_rainy=False, preview_frames_num=0, is_video_sky=False, is_show=False, - skybox_img=None, skybox_video=None, rain_cap_path=None, - halo_effect=True, auto_light_matching=False, - relighting_factor=0.8, recoloring_factor=0.5, skybox_center_crop=0.5 - ) -``` - -深度估计API - -**参数** - -* video_path(str):输入视频路径 -* save_path(str):视频保存路径 -* config(str): 预设 SkyBox 配置,所有预设配置如下,如果使用自定义 SkyBox,请设置为 None: -``` -[ - 'cloudy', 'district9ship', 'floatingcastle', 'galaxy', 'jupiter', - 'rainy', 'sunny', 'sunset', 'supermoon', 'thunderstorm' -] -``` -* skybox_img(str):自定义的 SkyBox 图像路径 -* skybox_video(str):自定义的 SkyBox 视频路径 -* is_video_sky(bool):自定义 SkyBox 是否为视频 -* rain_cap_path(str):自定义下雨效果视频路径 -* is_rainy(bool): 天空是否下雨 -* halo_effect(bool):是否开启 halo effect -* auto_light_matching(bool):是否开启自动亮度匹配 -* relighting_factor(float): Relighting factor -* recoloring_factor(float): Recoloring factor -* skybox_center_crop(float):SkyBox center crop factor -* preview_frames_num(int):设置预览帧数量,即只处理开头这几帧,设为 0,则为全部处理 -* is_show(bool):是否图形化预览 - -## 预测代码示例 - -```python -import paddlehub as hub - -model = hub.Module(name='SkyAR') - -model.MagicSky( - video_path=[path to input video path], - save_path=[path to save video path] -) -``` - -## 模型相关信息 - -### 模型代码 - -https://github.com/jm12138/SkyAR_Paddle_GUI - -### 依赖 - -paddlepaddle >= 2.0.0rc0 - -paddlehub >= 2.0.0rc0 diff --git a/modules/video/Video_editing/SkyAR/README.md b/modules/video/Video_editing/SkyAR/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7e6cb468f2220dcdc12d58cdf8be2986372d5f66 --- /dev/null +++ b/modules/video/Video_editing/SkyAR/README.md @@ -0,0 +1,127 @@ +# SkyAR + +|模型名称|SkyAR| +| :--- | :---: | +|类别|图像-图像分割| +|网络|UNet| +|数据集|UNet| +|是否支持Fine-tuning|否| +|模型大小|206MB| +|指标|-| +|最新更新日期|2021-02-26| + +## 一、模型基本信息 + +- ### 应用效果展示 + + - 样例结果示例: + * 原始视频: + + ![原始视频](https://img-blog.csdnimg.cn/20210126142046572.gif) + + * 木星: + + ![木星](https://img-blog.csdnimg.cn/20210125211435619.gif) + * 雨天: + + ![雨天](https://img-blog.csdnimg.cn/2021012521152492.gif) + * 银河: + + ![银河](https://img-blog.csdnimg.cn/20210125211523491.gif) + * 第九区飞船: + + ![第九区飞船](https://img-blog.csdnimg.cn/20210125211520955.gif) + * 原始视频: + + ![原始视频](https://img-blog.csdnimg.cn/20210126142038716.gif) + * 漂浮城堡: + + ![漂浮城堡](https://img-blog.csdnimg.cn/20210125211514997.gif) + * 电闪雷鸣: + + ![电闪雷鸣](https://img-blog.csdnimg.cn/20210125211433591.gif) + * 超级月亮: + + ![超级月亮](https://img-blog.csdnimg.cn/20210125211417524.gif) + +- ### 模型介绍 + + - SkyAR是一种用于视频中天空置换与协调的视觉方法,主要由三个核心组成:天空抠图网络、运动估计和图像融合。 + + - 更多详情请参考:[SkyAR](https://github.com/jiupinjia/SkyAR) + + - 参考论文:Zhengxia Zou. [Castle in the Sky: Dynamic Sky Replacement and Harmonization in Videos](https://arxiv.org/abs/2010.11800). CoRR, abs/2010.118003, 2020. + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 2.0.0 + + - paddlehub >= 2.0.0 + +- ### 2、安装 + + ```shell + $hub install SkyAR + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + +## 三、模型API预测 + +- ### 1、代码示例 + + ```python + import paddlehub as hub + + model = hub.Module(name='SkyAR') + + model.MagicSky( + video_path=[path to input video path], + save_path=[path to save video path] + ) + ``` +- ### 2、API + + ```python + def MagicSky( + video_path, save_path, config='jupiter', + is_rainy=False, preview_frames_num=0, is_video_sky=False, is_show=False, + skybox_img=None, skybox_video=None, rain_cap_path=None, + halo_effect=True, auto_light_matching=False, + relighting_factor=0.8, recoloring_factor=0.5, skybox_center_crop=0.5 + ) + ``` + + - **参数** + + * video_path(str):输入视频路径 + * save_path(str):视频保存路径 + * config(str): 预设 SkyBox 配置,所有预设配置如下,如果使用自定义 SkyBox,请设置为 None: + ``` + [ + 'cloudy', 'district9ship', 'floatingcastle', 'galaxy', 'jupiter', + 'rainy', 'sunny', 'sunset', 'supermoon', 'thunderstorm' + ] + ``` + * skybox_img(str):自定义的 SkyBox 图像路径 + * skybox_video(str):自定义的 SkyBox 视频路径 + * is_video_sky(bool):自定义 SkyBox 是否为视频 + * rain_cap_path(str):自定义下雨效果视频路径 + * is_rainy(bool): 天空是否下雨 + * halo_effect(bool):是否开启 halo effect + * auto_light_matching(bool):是否开启自动亮度匹配 + * relighting_factor(float): Relighting factor + * recoloring_factor(float): Recoloring factor + * skybox_center_crop(float):SkyBox center crop factor + * preview_frames_num(int):设置预览帧数量,即只处理开头这几帧,设为 0,则为全部处理 + * is_show(bool):是否图形化预览 + + +## 四、更新历史 + +* 1.0.0 + + 初始发布 diff --git a/modules/video/Video_editing/SkyAR/__init__.py b/modules/video/Video_editing/SkyAR/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/modules/thirdparty/video/Video_editing/SkyAR/module.py b/modules/video/Video_editing/SkyAR/module.py similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/module.py rename to modules/video/Video_editing/SkyAR/module.py diff --git a/modules/thirdparty/video/Video_editing/SkyAR/rain.py b/modules/video/Video_editing/SkyAR/rain.py similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/rain.py rename to modules/video/Video_editing/SkyAR/rain.py diff --git a/modules/thirdparty/video/Video_editing/SkyAR/rain_streaks/videoplayback.mp4 b/modules/video/Video_editing/SkyAR/rain_streaks/videoplayback.mp4 similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/rain_streaks/videoplayback.mp4 rename to modules/video/Video_editing/SkyAR/rain_streaks/videoplayback.mp4 diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox.py b/modules/video/Video_editing/SkyAR/skybox.py similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox.py rename to modules/video/Video_editing/SkyAR/skybox.py diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/cloudy.jpg b/modules/video/Video_editing/SkyAR/skybox/cloudy.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/cloudy.jpg rename to modules/video/Video_editing/SkyAR/skybox/cloudy.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/district9ship.jpg b/modules/video/Video_editing/SkyAR/skybox/district9ship.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/district9ship.jpg rename to modules/video/Video_editing/SkyAR/skybox/district9ship.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/floatingcastle.jpg b/modules/video/Video_editing/SkyAR/skybox/floatingcastle.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/floatingcastle.jpg rename to modules/video/Video_editing/SkyAR/skybox/floatingcastle.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/galaxy.jpg b/modules/video/Video_editing/SkyAR/skybox/galaxy.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/galaxy.jpg rename to modules/video/Video_editing/SkyAR/skybox/galaxy.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/jupiter.jpg b/modules/video/Video_editing/SkyAR/skybox/jupiter.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/jupiter.jpg rename to modules/video/Video_editing/SkyAR/skybox/jupiter.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/rainy.jpg b/modules/video/Video_editing/SkyAR/skybox/rainy.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/rainy.jpg rename to modules/video/Video_editing/SkyAR/skybox/rainy.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/sunny.jpg b/modules/video/Video_editing/SkyAR/skybox/sunny.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/sunny.jpg rename to modules/video/Video_editing/SkyAR/skybox/sunny.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/sunset.jpg b/modules/video/Video_editing/SkyAR/skybox/sunset.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/sunset.jpg rename to modules/video/Video_editing/SkyAR/skybox/sunset.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/supermoon.jpg b/modules/video/Video_editing/SkyAR/skybox/supermoon.jpg similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/supermoon.jpg rename to modules/video/Video_editing/SkyAR/skybox/supermoon.jpg diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skybox/thunderstorm.mp4 b/modules/video/Video_editing/SkyAR/skybox/thunderstorm.mp4 similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skybox/thunderstorm.mp4 rename to modules/video/Video_editing/SkyAR/skybox/thunderstorm.mp4 diff --git a/modules/thirdparty/video/Video_editing/SkyAR/skyfilter.py b/modules/video/Video_editing/SkyAR/skyfilter.py similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/skyfilter.py rename to modules/video/Video_editing/SkyAR/skyfilter.py diff --git a/modules/thirdparty/video/Video_editing/SkyAR/utils.py b/modules/video/Video_editing/SkyAR/utils.py similarity index 100% rename from modules/thirdparty/video/Video_editing/SkyAR/utils.py rename to modules/video/Video_editing/SkyAR/utils.py diff --git a/modules/video/classification/nonlocal_kinetics400/README.md b/modules/video/classification/nonlocal_kinetics400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0e88d19b45bfcc4b7427aed84823949984660496 --- /dev/null +++ b/modules/video/classification/nonlocal_kinetics400/README.md @@ -0,0 +1,109 @@ +# nonlocal_kinetics400 + +|模型名称|nonlocal_kinetics400| +| :--- | :---: | +|类别|视频-视频分类| +|网络|Non-local| +|数据集|Kinetics-400| +|是否支持Fine-tuning|否| +|模型大小|129MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + + +## 一、模型基本信息 + +- ### 模型介绍 + + - Non-local Neural Networks是由Xiaolong Wang等研究者在2017年提出的模型,主要特点是通过引入Non-local操作来描述距离较远的像素点之间的关联关系。其借助于传统计算机视觉中的non-local mean的思想,并将该思想扩展到神经网络中,通过定义输出位置和所有输入位置之间的关联函数,建立全局关联特性。Non-local模型的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。该PaddleHub Module可支持预测。 + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install nonlocal_kinetics400 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + hub run nonlocal_kinetics400 --input_path "/PATH/TO/VIDEO" --use_gpu True + ``` + + 或者 + + - ```shell + hub run nonlocal_kinetics400 --input_file test.txt --use_gpu True + ``` + + - test.txt 存放待分类视频的存放路径; + - Note: 该PaddleHub Module目前只支持在GPU环境下使用,在使用前,请使用下述命令指定GPU设备(设备ID请根据实际情况指定) + + - ```shell + export CUDA_VISIBLE_DEVICES=0 + ``` + + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + + import paddlehub as hub + + nonlocal = hub.Module(name="nonlocal_kinetics400") + + test_video_path = "/PATH/TO/VIDEO" + + # set input dict + input_dict = {"image": [test_video_path]} + + # execute predict and print the result + results = nonlocal.video_classification(data=input_dict) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def video_classification(data) + ``` + + - 用于视频分类预测 + + - **参数** + + - data(dict): dict类型,key为image,str类型;value为待分类的视频路径,list类型。 + + + - **返回** + + - result(list\[dict\]): list类型,每个元素为对应输入视频的预测结果。预测结果为dict类型,key为label,value为该label对应的概率值。 + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install nonlocal_kinetics400==1.0.0 + ``` diff --git a/modules/video/classification/stnet_kinetics400/README.md b/modules/video/classification/stnet_kinetics400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4cbed17470154ced6d747c1b01933f4d80109693 --- /dev/null +++ b/modules/video/classification/stnet_kinetics400/README.md @@ -0,0 +1,106 @@ +# stnet_kinetics400 + +|模型名称|stnet_kinetics400| +| :--- | :---: | +|类别|视频-视频分类| +|网络|StNet| +|数据集|Kinetics-400| +|是否支持Fine-tuning|否| +|模型大小|129MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + + +## 一、模型基本信息 + +- ### 模型介绍 + + - StNet模型框架为ActivityNet Kinetics Challenge 2018中夺冠的基础网络框架,是基于ResNet50实现的。该模型提出super-image的概念,在super-image上进行2D卷积,建模视频中局部时空相关性。另外通过temporal modeling block建模视频的全局时空依赖,最后用一个temporal Xception block对抽取的特征序列进行长时序建模。StNet的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。该PaddleHub Module可支持预测。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install stnet_kinetics400 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + hub run stnet_kinetics400 --input_path "/PATH/TO/VIDEO" + ``` + + 或者 + + - ```shell + hub run stnet_kinetics400 --input_file test.txt + ``` + + - test.txt 存放待分类视频的存放路径 + + + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + + import paddlehub as hub + + stnet = hub.Module(name="stnet_kinetics400") + + test_video_path = "/PATH/TO/VIDEO" + + # set input dict + input_dict = {"image": [test_video_path]} + + # execute predict and print the result + results = stnet.video_classification(data=input_dict) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def video_classification(data) + ``` + + - 用于视频分类预测 + + - **参数** + + - data(dict): dict类型,key为image,str类型;value为待分类的视频路径,list类型。 + + + - **返回** + + - result(list\[dict\]): list类型,每个元素为对应输入视频的预测结果。预测结果为dict类型,key为label,value为该label对应的概率值。 + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install stnet_kinetics400==1.0.0 + ``` diff --git a/modules/video/classification/tsm_kinetics400/README.md b/modules/video/classification/tsm_kinetics400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..5301071bcbd79b96dce5f2eafb9005d340ba0820 --- /dev/null +++ b/modules/video/classification/tsm_kinetics400/README.md @@ -0,0 +1,106 @@ +# tsm_kinetics400 + +|模型名称|tsm_kinetics400| +| :--- | :---: | +|类别|视频-视频分类| +|网络|TSM| +|数据集|Kinetics-400| +|是否支持Fine-tuning|否| +|模型大小|95MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + + +## 一、模型基本信息 + +- ### 模型介绍 + + - TSM(Temporal Shift Module)是由MIT和IBM Watson AI Lab的JiLin,ChuangGan和SongHan等人提出的通过时间位移来提高网络视频理解能力的模块。TSM的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。该PaddleHub Module可支持预测。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install tsm_kinetics400 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + hub run tsm_kinetics400 --input_path "/PATH/TO/VIDEO" + ``` + + 或者 + + - ```shell + hub run tsm_kinetics400 --input_file test.txt + ``` + + - Note: test.txt 存放待分类视频的存放路径 + + + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + + import paddlehub as hub + + tsm = hub.Module(name="tsm_kinetics400") + + test_video_path = "/PATH/TO/VIDEO" + + # set input dict + input_dict = {"image": [test_video_path]} + + # execute predict and print the result + results = tsm.video_classification(data=input_dict) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def video_classification(data) + ``` + + - 用于视频分类预测 + + - **参数** + + - data(dict): dict类型,key为image,str类型;value为待分类的视频路径,list类型。 + + + - **返回** + + - result(list\[dict\]): list类型,每个元素为对应输入视频的预测结果。预测结果为dict类型,key为label,value为该label对应的概率值。 + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install tsm_kinetics400==1.0.0 + ``` diff --git a/modules/video/classification/tsn_kinetics400/README.md b/modules/video/classification/tsn_kinetics400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e2d2e87630277819c0cf5b269d56e2128a05c32d --- /dev/null +++ b/modules/video/classification/tsn_kinetics400/README.md @@ -0,0 +1,108 @@ +# tsn_kinetics400 + +|模型名称|tsn_kinetics400| +| :--- | :---: | +|类别|视频-视频分类| +|网络|TSN| +|数据集|Kinetics-400| +|是否支持Fine-tuning|否| +|模型大小|95MB| +|最新更新日期|2021-02-26| +|数据指标|-| + + + +## 一、模型基本信息 + +- ### 模型介绍 + + - TSN(Temporal Segment Network)是视频分类领域经典的基于2D-CNN的解决方案。该方法主要解决视频的长时间行为判断问题,通过稀疏采样视频帧的方式代替稠密采样,既能捕获视频全局信息,也能去除冗余,降低计算量。最终将每帧特征平均融合后得到视频的整体特征,并用于分类。TSN的训练数据采用由DeepMind公布的Kinetics-400动作识别数据集。该PaddleHub Module可支持预测。 + + - 具体网络结构可参考论文:[TSN](https://arxiv.org/abs/1608.00859)。 + + + +## 二、安装 + +- ### 1、环境依赖 + + - paddlepaddle >= 1.4.0 + + - paddlehub >= 1.0.0 | [如何安装PaddleHub](../../../../docs/docs_ch/get_start/installation.rst) + +- ### 2、安装 + + - ```shell + $ hub install tsn_kinetics400 + ``` + - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) + | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + + + + +## 三、模型API预测 + +- ### 1、命令行预测 + + - ```shell + hub run tsn_kinetics400 --input_path "/PATH/TO/VIDEO" + ``` + + 或者 + + - ```shell + hub run tsn_kinetics400 --input_file test.txt + ``` + + - Note: test.txt 存放待分类视频的存放路径 + + + - 通过命令行方式实现文字识别模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) + +- ### 2、预测代码示例 + + - ```python + + import paddlehub as hub + + tsn = hub.Module(name="tsn_kinetics400") + + test_video_path = "/PATH/TO/VIDEO" + + # set input dict + input_dict = {"image": [test_video_path]} + + # execute predict and print the result + results = tsn.video_classification(data=input_dict) + for result in results: + print(result) + ``` + +- ### 3、API + + - ```python + def video_classification(data) + ``` + + - 用于视频分类预测 + + - **参数** + + - data(dict): dict类型,key为image,str类型;value为待分类的视频路径,list类型。 + + + - **返回** + + - result(list\[dict\]): list类型,每个元素为对应输入视频的预测结果。预测结果为dict类型,key为label,value为该label对应的概率值。 + + +## 五、更新历史 + +* 1.0.0 + + 初始发布 + + - ```shell + $ hub install tsn_kinetics400==1.0.0 + ``` diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/README.md b/modules/video/multiple_object_tracking/fairmot_dla34/README.md index 9a91b6faa348dd785ea18af9521c02cf58f8ef28..3ff8bd396a3bc60764241701856dfa20feb59459 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/README.md +++ b/modules/video/multiple_object_tracking/fairmot_dla34/README.md @@ -31,7 +31,7 @@ - ### 1、环境依赖 - - paddledet >= 2.1.0 + - paddledet >= 2.2.0 - opencv-python @@ -42,6 +42,7 @@ ``` - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + - 在windows下安装,由于paddledet package会依赖cython-bbox以及pycocotools, 这两个包需要windows用户提前装好,可参考[cython-bbox安装](https://blog.csdn.net/qq_24739717/article/details/105588729)和[pycocotools安装](https://github.com/PaddlePaddle/PaddleX/blob/release/1.3/docs/install.md#pycocotools安装问题) ## 三、模型API预测 - ### 1、命令行预测 @@ -52,7 +53,7 @@ ``` - 通过命令行方式实现多目标追踪模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/config/_base_/fairmot_dla34.yml b/modules/video/multiple_object_tracking/fairmot_dla34/config/_base_/fairmot_dla34.yml index c5f07de702fbeb594c9eeda60d709c0c40af8b1b..e2ca32a2b6c31d66a1b8f5fa42d278d0609dbdca 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/config/_base_/fairmot_dla34.yml +++ b/modules/video/multiple_object_tracking/fairmot_dla34/config/_base_/fairmot_dla34.yml @@ -5,7 +5,7 @@ FairMOT: detector: CenterNet reid: FairMOTEmbeddingHead loss: FairMOTLoss - tracker: JDETracker + tracker: FrozenJDETracker CenterNet: backbone: DLA diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/__init__.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..258e4c9010832936f098e6febe777ac556f0668f --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import matching +from . import tracker +from . import motion +from . import visualization +from . import utils + +from .matching import * +from .tracker import * +from .motion import * +from .visualization import * +from .utils import * diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/__init__.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..54c6680f79f16247c562a9da1024dd3e1de4c57f --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import jde_matching +from . import deepsort_matching + +from .jde_matching import * +from .deepsort_matching import * diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/deepsort_matching.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/deepsort_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..c55aa8876cc128f512aa4e2e4e48a935a3f8dd77 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/deepsort_matching.py @@ -0,0 +1,368 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/nwojke/deep_sort/tree/master/deep_sort +""" + +import numpy as np +from scipy.optimize import linear_sum_assignment +from ..motion import kalman_filter + +INFTY_COST = 1e+5 + +__all__ = [ + 'iou_1toN', + 'iou_cost', + '_nn_euclidean_distance', + '_nn_cosine_distance', + 'NearestNeighborDistanceMetric', + 'min_cost_matching', + 'matching_cascade', + 'gate_cost_matrix', +] + + +def iou_1toN(bbox, candidates): + """ + Computer intersection over union (IoU) by one box to N candidates. + + Args: + bbox (ndarray): A bounding box in format `(top left x, top left y, width, height)`. + candidates (ndarray): A matrix of candidate bounding boxes (one per row) in the + same format as `bbox`. + + Returns: + ious (ndarray): The intersection over union in [0, 1] between the `bbox` + and each candidate. A higher score means a larger fraction of the + `bbox` is occluded by the candidate. + """ + bbox_tl = bbox[:2] + bbox_br = bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + ious = area_intersection / (area_bbox + area_candidates - area_intersection) + return ious + + +def iou_cost(tracks, detections, track_indices=None, detection_indices=None): + """ + IoU distance metric. + + Args: + tracks (list[Track]): A list of tracks. + detections (list[Detection]): A list of detections. + track_indices (Optional[list[int]]): A list of indices to tracks that + should be matched. Defaults to all `tracks`. + detection_indices (Optional[list[int]]): A list of indices to detections + that should be matched. Defaults to all `detections`. + + Returns: + cost_matrix (ndarray): A cost matrix of shape len(track_indices), + len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = 1e+5 + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray([detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1. - iou_1toN(bbox, candidates) + return cost_matrix + + +def _nn_euclidean_distance(s, q): + """ + Compute pair-wise squared (Euclidean) distance between points in `s` and `q`. + + Args: + s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M. + q (ndarray): Query points: an LxM matrix of L samples of dimensionality M. + + Returns: + distances (ndarray): A vector of length M that contains for each entry in `q` the + smallest Euclidean distance to a sample in `s`. + """ + s, q = np.asarray(s), np.asarray(q) + if len(s) == 0 or len(q) == 0: + return np.zeros((len(s), len(q))) + s2, q2 = np.square(s).sum(axis=1), np.square(q).sum(axis=1) + distances = -2. * np.dot(s, q.T) + s2[:, None] + q2[None, :] + distances = np.clip(distances, 0., float(np.inf)) + + return np.maximum(0.0, distances.min(axis=0)) + + +def _nn_cosine_distance(s, q): + """ + Compute pair-wise cosine distance between points in `s` and `q`. + + Args: + s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M. + q (ndarray): Query points: an LxM matrix of L samples of dimensionality M. + + Returns: + distances (ndarray): A vector of length M that contains for each entry in `q` the + smallest Euclidean distance to a sample in `s`. + """ + s = np.asarray(s) / np.linalg.norm(s, axis=1, keepdims=True) + q = np.asarray(q) / np.linalg.norm(q, axis=1, keepdims=True) + distances = 1. - np.dot(s, q.T) + + return distances.min(axis=0) + + +class NearestNeighborDistanceMetric(object): + """ + A nearest neighbor distance metric that, for each target, returns + the closest distance to any sample that has been observed so far. + + Args: + metric (str): Either "euclidean" or "cosine". + matching_threshold (float): The matching threshold. Samples with larger + distance are considered an invalid match. + budget (Optional[int]): If not None, fix samples per class to at most + this number. Removes the oldest samples when the budget is reached. + + Attributes: + samples (Dict[int -> List[ndarray]]): A dictionary that maps from target + identities to the list of samples that have been observed so far. + """ + + def __init__(self, metric, matching_threshold, budget=None): + if metric == "euclidean": + self._metric = _nn_euclidean_distance + elif metric == "cosine": + self._metric = _nn_cosine_distance + else: + raise ValueError("Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + """ + Update the distance metric with new data. + + Args: + features (ndarray): An NxM matrix of N features of dimensionality M. + targets (ndarray): An integer array of associated target identities. + active_targets (List[int]): A list of targets that are currently + present in the scene. + """ + for feature, target in zip(features, targets): + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + self.samples[target] = self.samples[target][-self.budget:] + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + """ + Compute distance between features and targets. + + Args: + features (ndarray): An NxM matrix of N features of dimensionality M. + targets (list[int]): A list of targets to match the given `features` against. + + Returns: + cost_matrix (ndarray): a cost matrix of shape len(targets), len(features), + where element (i, j) contains the closest squared distance between + `targets[i]` and `features[j]`. + """ + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix + + +def min_cost_matching(distance_metric, max_distance, tracks, detections, track_indices=None, detection_indices=None): + """ + Solve linear assignment problem. + + Args: + distance_metric : + Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as + well as a list of N track indices and M detection indices. The + metric should return the NxM dimensional cost matrix, where element + (i, j) is the association cost between the i-th track in the given + track indices and the j-th detection in the given detection_indices. + max_distance (float): Gating threshold. Associations with cost larger + than this value are disregarded. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (list[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + + Returns: + A tuple (List[(int, int)], List[int], List[int]) with the following + three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices) + + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + indices = linear_sum_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in indices[1]: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in indices[0]: + unmatched_tracks.append(track_idx) + for row, col in zip(indices[0], indices[1]): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade(distance_metric, + max_distance, + cascade_depth, + tracks, + detections, + track_indices=None, + detection_indices=None): + """ + Run matching cascade. + + Args: + distance_metric : + Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as + well as a list of N track indices and M detection indices. The + metric should return the NxM dimensional cost matrix, where element + (i, j) is the association cost between the i-th track in the given + track indices and the j-th detection in the given detection_indices. + max_distance (float): Gating threshold. Associations with cost larger + than this value are disregarded. + cascade_depth (int): The cascade depth, should be se to the maximum + track age. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (list[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + + Returns: + A tuple (List[(int, int)], List[int], List[int]) with the following + three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + for level in range(cascade_depth): + if len(unmatched_detections) == 0: # No detections left + break + + track_indices_l = [k for k in track_indices if tracks[k].time_since_update == 1 + level] + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + matches_l, _, unmatched_detections = \ + min_cost_matching( + distance_metric, max_distance, tracks, detections, + track_indices_l, unmatched_detections) + matches += matches_l + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix(kf, + cost_matrix, + tracks, + detections, + track_indices, + detection_indices, + gated_cost=INFTY_COST, + only_position=False): + """ + Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + + Args: + kf (object): The Kalman filter. + cost_matrix (ndarray): The NxM dimensional cost matrix, where N is the + number of track indices and M is the number of detection indices, + such that entry (i, j) is the association cost between + `tracks[track_indices[i]]` and `detections[detection_indices[j]]`. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (List[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + gated_cost (Optional[float]): Entries in the cost matrix corresponding + to infeasible associations are set this value. Defaults to a very + large value. + only_position (Optional[bool]): If True, only the x, y position of the + state distribution is considered during gating. Default False. + """ + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + return cost_matrix diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/jde_matching.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/jde_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..bf2e891c391c98ed8944f88377f62c9722fa5155 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/matching/jde_matching.py @@ -0,0 +1,123 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py +""" + +import lap +import scipy +import numpy as np +from scipy.spatial.distance import cdist +from ..motion import kalman_filter + +from ppdet.utils.logger import setup_logger +logger = setup_logger(__name__) + +__all__ = [ + 'merge_matches', + 'linear_assignment', + 'cython_bbox_ious', + 'iou_distance', + 'embedding_distance', + 'fuse_motion', +] + + +def merge_matches(m1, m2, shape): + O, P, Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1 * M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def cython_bbox_ious(atlbrs, btlbrs): + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + try: + import cython_bbox + except Exception as e: + logger.error('cython_bbox not found, please install cython_bbox.' 'for example: `pip install cython_bbox`.') + raise e + + ious = cython_bbox.bbox_overlaps( + np.ascontiguousarray(atlbrs, dtype=np.float), np.ascontiguousarray(btlbrs, dtype=np.float)) + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU between two list[STrack]. + """ + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 + and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = cython_bbox_ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + + +def embedding_distance(tracks, detections, metric='euclidean'): + """ + Compute cost based on features between two list[STrack]. + """ + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/motion/__init__.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/motion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e42dd0b019d66d6ea07bec1ad90cf9a8d53d8172 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/motion/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import kalman_filter + +from .kalman_filter import * diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/motion/kalman_filter.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/motion/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc182e4c5e76e0688688c883b2a24fa30df9c74 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/motion/kalman_filter.py @@ -0,0 +1,237 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/kalman_filter.py +""" + +import numpy as np +import scipy.linalg + +__all__ = ['KalmanFilter'] +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" + +chi2inv95 = {1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """ + Create track from unassociated measurement. + + Args: + measurement (ndarray): Bounding box coordinates (x, y, a, h) with + center position (x, y), aspect ratio a, and height h. + + Returns: + The mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are + initialized to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2, + 2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3] + ] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """ + Run Kalman filter prediction step. + + Args: + mean (ndarray): The 8 dimensional mean vector of the object state + at the previous time step. + covariance (ndarray): The 8x8 dimensional covariance matrix of the + object state at the previous time step. + + Returns: + The mean vector and covariance matrix of the predicted state. + Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2, + self._std_weight_position * mean[3] + ] + std_vel = [ + self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5, + self._std_weight_velocity * mean[3] + ] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """ + Project state distribution to measurement space. + + Args + mean (ndarray): The state's mean vector (8 dimensional array). + covariance (ndarray): The state's covariance matrix (8x8 dimensional). + + Returns: + The projected mean and covariance matrix of the given state estimate. + """ + std = [ + self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1, + self._std_weight_position * mean[3] + ] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """ + Run Kalman filter prediction step (Vectorized version). + + Args: + mean (ndarray): The Nx8 dimensional mean matrix of the object states + at the previous time step. + covariance (ndarray): The Nx8x8 dimensional covariance matrics of the + object states at the previous time step. + + Returns: + The mean vector and covariance matrix of the predicted state. + Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3] + ] + std_vel = [ + self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3] + ] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """ + Run Kalman filter correction step. + + Args: + mean (ndarray): The predicted state's mean vector (8 dimensional). + covariance (ndarray): The state's covariance matrix (8x8 dimensional). + measurement (ndarray): The 4 dimensional measurement vector + (x, y, a, h), where (x, y) is the center position, a the aspect + ratio, and h the height of the bounding box. + + Returns: + The measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve((chol_factor, lower), + np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): + """ + Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + + Args: + mean (ndarray): Mean vector over the state distribution (8 + dimensional). + covariance (ndarray): Covariance of the state distribution (8x8 + dimensional). + measurements (ndarray): An Nx4 dimensional matrix of N measurements, + each in format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position (Optional[bool]): If True, distance computation is + done with respect to the bounding box center position only. + metric (str): Metric type, 'gaussian' or 'maha'. + + Returns + An array of length N, where the i-th element contains the squared + Mahalanobis distance between (mean, covariance) and `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/__init__.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..904822119661be61141715c638388db9d045fee1 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import base_jde_tracker +from . import base_sde_tracker +from . import jde_tracker + +from .base_jde_tracker import * +from .base_sde_tracker import * +from .jde_tracker import * diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/base_jde_tracker.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/base_jde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..9505a709ee573acecf4b5dd7e02a06cee9d44284 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/base_jde_tracker.py @@ -0,0 +1,257 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py +""" + +import numpy as np +from collections import deque, OrderedDict +from ..matching import jde_matching as matching +from ppdet.core.workspace import register, serializable + +__all__ = [ + 'TrackState', + 'BaseTrack', + 'STrack', + 'joint_stracks', + 'sub_stracks', + 'remove_duplicate_stracks', +] + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed + + +class STrack(BaseTrack): + def __init__(self, tlwh, score, temp_feat, buffer_size=30): + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + self.smooth_feat = None + self.update_features(temp_feat) + self.features = deque([], maxlen=buffer_size) + self.alpha = 0.9 + + def update_features(self, feat): + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks, kalman_filter): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = kalman_filter.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, + self.tlwh_to_xyah(new_track.tlwh)) + + self.update_features(new_track.curr_feat) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + def update(self, new_track, frame_id, update_feature=True): + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + if update_feature: + self.update_features(new_track.curr_feat) + + @property + def tlwh(self): + """ + Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + def tlbr(self): + """ + Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + def tlwh_to_xyah(tlwh): + """ + Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/base_sde_tracker.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/base_sde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..2e811e536a42ff781f60872b448b251de0301f61 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/base_sde_tracker.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/track.py +""" + +from ppdet.core.workspace import register, serializable + +__all__ = ['TrackState', 'Track'] + + +class TrackState(object): + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + """ + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track(object): + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + + Args: + mean (ndarray): Mean vector of the initial state distribution. + covariance (ndarray): Covariance matrix of the initial state distribution. + track_id (int): A unique track identifier. + n_init (int): Number of consecutive detections before the track is confirmed. + The track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + max_age (int): The maximum number of consecutive misses before the track + state is set to `Deleted`. + feature (Optional[ndarray]): Feature vector of the detection this track + originates from. If not None, this feature is added to the `features` cache. + + Attributes: + hits (int): Total number of measurement updates. + age (int): Total number of frames since first occurance. + time_since_update (int): Total number of frames since last measurement + update. + state (TrackState): The current track state. + features (List[ndarray]): A cache of features. On each measurement update, + the associated feature vector is added to this list. + """ + + def __init__(self, mean, covariance, track_id, n_init, max_age, feature=None): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + self.hits = 1 + self.age = 1 + self.time_since_update = 0 + + self.state = TrackState.Tentative + self.features = [] + if feature is not None: + self.features.append(feature) + + self._n_init = n_init + self._max_age = max_age + + def to_tlwh(self): + """Get position in format `(top left x, top left y, width, height)`.""" + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get position in bounding box format `(min x, miny, max x, max y)`.""" + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def predict(self, kalman_filter): + """ + Propagate the state distribution to the current time step using a Kalman + filter prediction step. + """ + self.mean, self.covariance = kalman_filter.predict(self.mean, self.covariance) + self.age += 1 + self.time_since_update += 1 + + def update(self, kalman_filter, detection): + """ + Perform Kalman filter measurement update step and update the associated + detection feature cache. + """ + self.mean, self.covariance = kalman_filter.update(self.mean, self.covariance, detection.to_xyah()) + self.features.append(detection.feature) + + self.hits += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed).""" + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/jde_tracker.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/jde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..2e1cafb345b7687e563fc6d9c2c1769cb39d690c --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/tracker/jde_tracker.py @@ -0,0 +1,248 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py +""" + +import paddle + +from ..matching import jde_matching as matching +from .base_jde_tracker import TrackState, BaseTrack, STrack +from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks + +from ppdet.core.workspace import register, serializable +from ppdet.utils.logger import setup_logger +logger = setup_logger(__name__) + +__all__ = ['FrozenJDETracker'] + + +@register +@serializable +class FrozenJDETracker(object): + __inject__ = ['motion'] + """ + JDE tracker + + Args: + det_thresh (float): threshold of detection score + track_buffer (int): buffer for tracker + min_box_area (int): min box area to filter out low quality boxes + vertical_ratio (float): w/h, the vertical ratio of the bbox to filter + bad results, set 1.6 default for pedestrian tracking. If set -1 + means no need to filter bboxes. + tracked_thresh (float): linear assignment threshold of tracked + stracks and detections + r_tracked_thresh (float): linear assignment threshold of + tracked stracks and unmatched detections + unconfirmed_thresh (float): linear assignment threshold of + unconfirmed stracks and unmatched detections + motion (object): KalmanFilter instance + conf_thres (float): confidence threshold for tracking + metric_type (str): either "euclidean" or "cosine", the distance metric + used for measurement to track association. + """ + + def __init__(self, + det_thresh=0.3, + track_buffer=30, + min_box_area=200, + vertical_ratio=1.6, + tracked_thresh=0.7, + r_tracked_thresh=0.5, + unconfirmed_thresh=0.7, + motion='KalmanFilter', + conf_thres=0, + metric_type='euclidean'): + self.det_thresh = det_thresh + self.track_buffer = track_buffer + self.min_box_area = min_box_area + self.vertical_ratio = vertical_ratio + + self.tracked_thresh = tracked_thresh + self.r_tracked_thresh = r_tracked_thresh + self.unconfirmed_thresh = unconfirmed_thresh + self.motion = motion + self.conf_thres = conf_thres + self.metric_type = metric_type + + self.frame_id = 0 + self.tracked_stracks = [] + self.lost_stracks = [] + self.removed_stracks = [] + + self.max_time_lost = 0 + # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer) + + def update(self, pred_dets, pred_embs): + """ + Processes the image frame and finds bounding box(detections). + Associates the detection with corresponding tracklets and also handles + lost, removed, refound and active tracklets. + + Args: + pred_dets (Tensor): Detection results of the image, shape is [N, 5]. + pred_embs (Tensor): Embedding results of the image, shape is [N, 512]. + + Return: + output_stracks (list): The list contains information regarding the + online_tracklets for the recieved image tensor. + """ + self.frame_id += 1 + activated_starcks = [] + # for storing active tracks, for the current frame + refind_stracks = [] + # Lost Tracks whose detections are obtained in the current frame + lost_stracks = [] + # The tracks which are not obtained in the current frame but are not + # removed. (Lost for some time lesser than the threshold for removing) + removed_stracks = [] + + remain_inds = paddle.nonzero(pred_dets[:, 4] > self.conf_thres) + if remain_inds.shape[0] == 0: + pred_dets = paddle.zeros([0, 1]) + pred_embs = paddle.zeros([0, 1]) + else: + pred_dets = paddle.gather(pred_dets, remain_inds) + pred_embs = paddle.gather(pred_embs, remain_inds) + + # Filter out the image with box_num = 0. pred_dets = [[0.0, 0.0, 0.0 ,0.0]] + empty_pred = True if len(pred_dets) == 1 and paddle.sum(pred_dets) == 0.0 else False + """ Step 1: Network forward, get detections & embeddings""" + if len(pred_dets) > 0 and not empty_pred: + pred_dets = pred_dets.numpy() + pred_embs = pred_embs.numpy() + detections = [ + STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(pred_dets, pred_embs) + ] + else: + detections = [] + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + # previous tracks which are not active in the current frame are added in unconfirmed list + unconfirmed.append(track) + else: + # Active tracks are added to the local list 'tracked_stracks' + tracked_stracks.append(track) + """ Step 2: First association, with embedding""" + # Combining currently tracked_stracks and lost_stracks + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool, self.motion) + + dists = matching.embedding_distance(strack_pool, detections, metric=self.metric_type) + dists = matching.fuse_motion(self.motion, dists, strack_pool, detections) + # The dists is the list of distances of the detection with the tracks in strack_pool + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.tracked_thresh) + # The matches is the array for corresponding matches of the detection with the corresponding strack_pool + + for itracked, idet in matches: + # itracked is the id of the track and idet is the detection + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + # If the track is active, add the detection to the track + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + # We have obtained a detection from a track which is not active, + # hence put the track in refind_stracks list + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # None of the steps below happen if there are no undetected tracks. + """ Step 3: Second association, with IOU""" + detections = [detections[i] for i in u_detection] + # detections is now a list of the unmatched detections + r_tracked_stracks = [] + # This is container for stracks which were tracked till the previous + # frame but no detection was found for it in the current frame. + + for i in u_track: + if strack_pool[i].state == TrackState.Tracked: + r_tracked_stracks.append(strack_pool[i]) + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.r_tracked_thresh) + # matches is the list of detections which matched with corresponding + # tracks by IOU distance method. + + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + # Same process done for some unmatched detections, but now considering IOU_distance as measure + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=self.unconfirmed_thresh) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + + # The tracks which are yet not matched + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + # after all these confirmation steps, if a new detection is found, it is initialized for a new track + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.motion, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + # If the tracks are lost for more frames than the threshold number, the tracks are removed. + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + + return output_stracks diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/utils.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..12c61686a1715a965407822dcf19fd1081f292d7 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/utils.py @@ -0,0 +1,176 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import time +import paddle +import numpy as np + +__all__ = [ + 'Timer', + 'Detection', + 'load_det_results', + 'preprocess_reid', + 'get_crops', + 'clip_box', + 'scale_coords', +] + + +class Timer(object): + """ + This class used to compute and print the current FPS while evaling. + """ + + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + if average: + self.duration = self.average_time + else: + self.duration = self.diff + return self.duration + + def clear(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. + + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + + Args: + tlwh (ndarray): Bounding box in format `(top left x, top left y, + width, height)`. + confidence (ndarray): Detector confidence score. + feature (Tensor): A feature vector that describes the object + contained in this image. + """ + + def __init__(self, tlwh, confidence, feature): + self.tlwh = np.asarray(tlwh, dtype=np.float32) + self.confidence = np.asarray(confidence, dtype=np.float32) + self.feature = feature + + def to_tlbr(self): + """ + Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + def to_xyah(self): + """ + Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + +def load_det_results(det_file, num_frames): + assert os.path.exists(det_file) and os.path.isfile(det_file), \ + 'Error: det_file: {} not exist or not a file.'.format(det_file) + labels = np.loadtxt(det_file, dtype='float32', delimiter=',') + results_list = [] + for frame_i in range(0, num_frames): + results = {'bbox': [], 'score': []} + lables_with_frame = labels[labels[:, 0] == frame_i + 1] + for l in lables_with_frame: + results['bbox'].append(l[1:5]) + results['score'].append(l[5]) + results_list.append(results) + return results_list + + +def scale_coords(coords, input_shape, im_shape, scale_factor): + im_shape = im_shape.numpy()[0] + ratio = scale_factor[0][0] + pad_w = (input_shape[1] - int(im_shape[1])) / 2 + pad_h = (input_shape[0] - int(im_shape[0])) / 2 + coords = paddle.cast(coords, 'float32') + coords[:, 0::2] -= pad_w + coords[:, 1::2] -= pad_h + coords[:, 0:4] /= ratio + coords[:, :4] = paddle.clip(coords[:, :4], min=0, max=coords[:, :4].max()) + return coords.round() + + +def clip_box(xyxy, input_shape, im_shape, scale_factor): + im_shape = im_shape.numpy()[0] + ratio = scale_factor.numpy()[0][0] + img0_shape = [int(im_shape[0] / ratio), int(im_shape[1] / ratio)] + + xyxy[:, 0::2] = paddle.clip(xyxy[:, 0::2], min=0, max=img0_shape[1]) + xyxy[:, 1::2] = paddle.clip(xyxy[:, 1::2], min=0, max=img0_shape[0]) + return xyxy + + +def get_crops(xyxy, ori_img, pred_scores, w, h): + crops = [] + keep_scores = [] + xyxy = xyxy.numpy().astype(np.int64) + ori_img = ori_img.numpy() + ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) + pred_scores = pred_scores.numpy() + for i, bbox in enumerate(xyxy): + if bbox[2] <= bbox[0] or bbox[3] <= bbox[1]: + continue + crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] + crops.append(crop) + keep_scores.append(pred_scores[i]) + if len(crops) == 0: + return [], [] + crops = preprocess_reid(crops, w, h) + return crops, keep_scores + + +def preprocess_reid(imgs, w=64, h=192, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + im_batch = [] + for img in imgs: + img = cv2.resize(img, (w, h)) + img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 + img_mean = np.array(mean).reshape((3, 1, 1)) + img_std = np.array(std).reshape((3, 1, 1)) + img -= img_mean + img /= img_std + img = np.expand_dims(img, axis=0) + im_batch.append(img) + im_batch = np.concatenate(im_batch, 0) + return im_batch diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/visualization.py b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..cd9c5b15e15f677b7955dd4eba40798e985315a1 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/modeling/mot/visualization.py @@ -0,0 +1,117 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np + + +def tlwhs_to_tlbrs(tlwhs): + tlbrs = np.copy(tlwhs) + if len(tlbrs) == 0: + return tlbrs + tlbrs[:, 2] += tlwhs[:, 0] + tlbrs[:, 3] += tlwhs[:, 1] + return tlbrs + + +def get_color(idx): + idx = idx * 3 + color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) + return color + + +def resize_image(image, max_size=800): + if max(image.shape[:2]) > max_size: + scale = float(max_size) / max(image.shape[:2]) + image = cv2.resize(image, None, fx=scale, fy=scale) + return image + + +def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None): + im = np.ascontiguousarray(np.copy(image)) + im_h, im_w = im.shape[:2] + + top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 + + text_scale = max(1, image.shape[1] / 1600.) + text_thickness = 2 + line_thickness = max(1, int(image.shape[1] / 500.)) + + radius = max(5, int(im_w / 140.)) + cv2.putText( + im, + 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), (0, int(15 * text_scale)), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=2) + + for i, tlwh in enumerate(tlwhs): + x1, y1, w, h = tlwh + intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) + obj_id = int(obj_ids[i]) + id_text = '{}'.format(int(obj_id)) + if ids2 is not None: + id_text = id_text + ', {}'.format(int(ids2[i])) + _line_thickness = 1 if obj_id <= 0 else line_thickness + color = get_color(abs(obj_id)) + cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) + cv2.putText( + im, + id_text, (intbox[0], intbox[1] + 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=text_thickness) + + if scores is not None: + text = '{:.2f}'.format(float(scores[i])) + cv2.putText( + im, + text, (intbox[0], intbox[1] - 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 255, 255), + thickness=text_thickness) + return im + + +def plot_trajectory(image, tlwhs, track_ids): + image = image.copy() + for one_tlwhs, track_id in zip(tlwhs, track_ids): + color = get_color(int(track_id)) + for tlwh in one_tlwhs: + x1, y1, w, h = tuple(map(int, tlwh)) + cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2) + return image + + +def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None): + im = np.copy(image) + text_scale = max(1, image.shape[1] / 800.) + thickness = 2 if text_scale > 1.3 else 1 + for i, det in enumerate(tlbrs): + x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int) + if len(det) >= 7: + label = 'det' if det[5] > 0 else 'trk' + if ids is not None: + text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i]) + cv2.putText( + im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=thickness) + else: + text = '{}# {:.2f}'.format(label, det[6]) + + if scores is not None: + text = '{:.2f}'.format(scores[i]) + cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=thickness) + + cv2.rectangle(im, (x1, y1), (x2, y2), color, 2) + return im diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/module.py b/modules/video/multiple_object_tracking/fairmot_dla34/module.py index c110d712fc12c612bb72bab15eec349b5d6f7e5b..0543dc55e5e1126edd922bfbb220afdba9c9188d 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/module.py +++ b/modules/video/multiple_object_tracking/fairmot_dla34/module.py @@ -31,12 +31,13 @@ from .tracker import StreamTracker logger = setup_logger('Predict') -@moduleinfo(name="fairmot_dla34", - type="CV/multiple_object_tracking", - author="paddlepaddle", - author_email="", - summary="Fairmot is a model for multiple object tracking.", - version="1.0.0") +@moduleinfo( + name="fairmot_dla34", + type="CV/multiple_object_tracking", + author="paddlepaddle", + author_email="", + summary="Fairmot is a model for multiple object tracking.", + version="1.0.0") class FairmotTracker_1088x608: def __init__(self): self.pretrained_model = os.path.join(self.directory, "fairmot_dla34_30e_1088x608") @@ -70,12 +71,13 @@ class FairmotTracker_1088x608: tracker.load_weights_jde(self.pretrained_model) signal.signal(signal.SIGINT, self.signalhandler) # inference - tracker.videostream_predict(video_stream=video_stream, - output_dir=output_dir, - data_type='mot', - model_type='FairMOT', - visualization=visualization, - draw_threshold=draw_threshold) + tracker.videostream_predict( + video_stream=video_stream, + output_dir=output_dir, + data_type='mot', + model_type='FairMOT', + visualization=visualization, + draw_threshold=draw_threshold) def stream_mode(self, output_dir='mot_result', visualization=True, draw_threshold=0.5, use_gpu=False): ''' @@ -106,11 +108,12 @@ class FairmotTracker_1088x608: return self def __enter__(self): - self.tracker_generator = self.tracker.imagestream_predict(self.output_dir, - data_type='mot', - model_type='FairMOT', - visualization=self.visualization, - draw_threshold=self.draw_threshold) + self.tracker_generator = self.tracker.imagestream_predict( + self.output_dir, + data_type='mot', + model_type='FairMOT', + visualization=self.visualization, + draw_threshold=self.draw_threshold) next(self.tracker_generator) def __exit__(self, exc_type, exc_value, traceback): @@ -129,10 +132,12 @@ class FairmotTracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter(output_video_path, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=[img.shape[1], img.shape[0]]) + video_writer = cv2.VideoWriter( + output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -169,10 +174,11 @@ class FairmotTracker_1088x608: """ Run as a command. """ - self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -204,10 +210,12 @@ class FairmotTracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter(output_video_path, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=[img.shape[1], img.shape[0]]) + video_writer = cv2.VideoWriter( + output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -223,22 +231,16 @@ class FairmotTracker_1088x608: """ self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") - self.arg_config_group.add_argument('--output_dir', - type=str, - default='mot_result', - help='Directory name for output tracking results.') - self.arg_config_group.add_argument('--visualization', - action='store_true', - help="whether to save output as images.") - self.arg_config_group.add_argument("--draw_threshold", - type=float, - default=0.5, - help="Threshold to reserve the result for visualization.") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='mot_result', help='Directory name for output tracking results.') + self.arg_config_group.add_argument( + '--visualization', action='store_true', help="whether to save output as images.") + self.arg_config_group.add_argument( + "--draw_threshold", type=float, default=0.5, help="Threshold to reserve the result for visualization.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument('--video_stream', - type=str, - help="path to video stream, can be a video file or stream device number.") + self.arg_input_group.add_argument( + '--video_stream', type=str, help="path to video stream, can be a video file or stream device number.") diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/requirements.txt b/modules/video/multiple_object_tracking/fairmot_dla34/requirements.txt index 01118b11cddcf87ff9e97436ec5b7e79c00588b8..8e327df2acddea95260bb92551cfc9afee06b642 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/requirements.txt +++ b/modules/video/multiple_object_tracking/fairmot_dla34/requirements.txt @@ -1,2 +1,4 @@ -paddledet >= 2.1.0 +cython +paddledet >= 2.2.0 opencv-python +imageio diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/tracker.py b/modules/video/multiple_object_tracking/fairmot_dla34/tracker.py index 0a916f0e638894423a26451a9a526e33366689e5..016f1e5878b12418ebb29344287bcfc6af830a8e 100644 --- a/modules/video/multiple_object_tracking/fairmot_dla34/tracker.py +++ b/modules/video/multiple_object_tracking/fairmot_dla34/tracker.py @@ -16,18 +16,19 @@ import cv2 import glob import paddle import numpy as np +import collections from ppdet.core.workspace import create from ppdet.utils.checkpoint import load_weight, load_pretrain_weight -from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box -from ppdet.modeling.mot.utils import Timer, load_det_results -from ppdet.modeling.mot import visualization as mot_vis from ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric import ppdet.utils.stats as stats from ppdet.engine.callbacks import Callback, ComposeCallback from ppdet.utils.logger import setup_logger from .dataset import MOTVideoStream, MOTImageStream +from .utils import Timer +from .modeling.mot.utils import Detection, get_crops, scale_coords, clip_box +from .modeling.mot import visualization as mot_vis logger = setup_logger(__name__) @@ -41,7 +42,8 @@ class StreamTracker(object): self.optimizer = None # build model - self.model = create(cfg.architecture) + with paddle.no_grad(): + self.model = create(cfg.architecture) self.status = {} self.start_epoch = 0 @@ -70,7 +72,6 @@ class StreamTracker(object): timer.tic() pred_dets, pred_embs = self.model(data) online_targets = self.model.tracker.update(pred_dets, pred_embs) - online_tlwhs, online_ids = [], [] online_scores = [] for t in online_targets: @@ -108,7 +109,6 @@ class StreamTracker(object): timer.tic() pred_dets, pred_embs = self.model(data) online_targets = self.model.tracker.update(pred_dets, pred_embs) - online_tlwhs, online_ids = [], [] online_scores = [] for t in online_targets: @@ -159,13 +159,12 @@ class StreamTracker(object): yield results = [] while True: - with paddle.no_grad(): - try: - results, nf = next(generator) - yield results - except StopIteration as e: - self.write_mot_results(result_filename, results, data_type) - return + try: + results, nf = next(generator) + yield results + except StopIteration as e: + self.write_mot_results(result_filename, results, data_type) + return def videostream_predict(self, video_stream, @@ -175,7 +174,7 @@ class StreamTracker(object): visualization=True, draw_threshold=0.5): assert video_stream is not None, \ - "--video_file or --image_dir should be set." + "--video_stream should be set." if not os.path.exists(output_dir): os.makedirs(output_dir) result_root = os.path.join(output_dir, 'mot_results') @@ -215,9 +214,10 @@ class StreamTracker(object): img = cv2.imread(os.path.join(save_dir, '00000.jpg')) video_writer = cv2.VideoWriter( output_video_path, + apiPreference=0, fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps=30, - frameSize=[img.shape[1], img.shape[0]]) + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) diff --git a/modules/video/multiple_object_tracking/fairmot_dla34/utils.py b/modules/video/multiple_object_tracking/fairmot_dla34/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4426f217f9f5fb5c7afa6593c2b83ce4b67236f9 --- /dev/null +++ b/modules/video/multiple_object_tracking/fairmot_dla34/utils.py @@ -0,0 +1,39 @@ +import time + + +class Timer(object): + """ + This class used to compute and print the current FPS while evaling. + """ + + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + if average: + self.duration = self.average_time + else: + self.duration = self.diff + return self.duration + + def clear(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. diff --git a/modules/video/multiple_object_tracking/jde_darknet53/README.md b/modules/video/multiple_object_tracking/jde_darknet53/README.md index 26829586031bceed1d0d100c6ef4045ddf6dc64c..87e0755cce6c439e4f0ebb04c6ef5e3f740d433a 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/README.md +++ b/modules/video/multiple_object_tracking/jde_darknet53/README.md @@ -31,7 +31,7 @@ - ### 1、环境依赖 - - paddledet >= 2.1.0 + - paddledet >= 2.2.0 - opencv-python @@ -42,6 +42,7 @@ ``` - 如您安装时遇到问题,可参考:[零基础windows安装](../../../../docs/docs_ch/get_start/windows_quickstart.md) | [零基础Linux安装](../../../../docs/docs_ch/get_start/linux_quickstart.md) | [零基础MacOS安装](../../../../docs/docs_ch/get_start/mac_quickstart.md) + - 在windows下安装,由于paddledet package会依赖cython-bbox以及pycocotools, 这两个包需要windows用户提前装好,可参考[cython-bbox安装](https://blog.csdn.net/qq_24739717/article/details/105588729)和[pycocotools安装](https://github.com/PaddlePaddle/PaddleX/blob/release/1.3/docs/install.md#pycocotools安装问题) ## 三、模型API预测 @@ -54,7 +55,7 @@ ``` - 通过命令行方式实现多目标追踪模型的调用,更多请见 [PaddleHub命令行指令](../../../../docs/docs_ch/tutorial/cmd_usage.rst) -- ### 2、代码示例 +- ### 2、预测代码示例 - ```python import paddlehub as hub diff --git a/modules/video/multiple_object_tracking/jde_darknet53/config/_base_/jde_darknet53.yml b/modules/video/multiple_object_tracking/jde_darknet53/config/_base_/jde_darknet53.yml index 73faa52f662e7db24ef40c25c029561225d1a3b8..dcc67ac4276c3e8a3abd81950d970f3643d05551 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/config/_base_/jde_darknet53.yml +++ b/modules/video/multiple_object_tracking/jde_darknet53/config/_base_/jde_darknet53.yml @@ -5,7 +5,7 @@ find_unused_parameters: True JDE: detector: YOLOv3 reid: JDEEmbeddingHead - tracker: JDETracker + tracker: FrozenJDETracker YOLOv3: backbone: DarkNet diff --git a/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml b/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml index d2ac3aee460aaa378dcef11c3a3fce9aa4c29f05..33fa547afe9f95f5dfe7ea321c3e9be1c3634e1d 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml +++ b/modules/video/multiple_object_tracking/jde_darknet53/config/jde_darknet53_30e_1088x608.yml @@ -9,7 +9,7 @@ _BASE_: [ JDE: detector: YOLOv3 reid: JDEEmbeddingHead - tracker: JDETracker + tracker: FrozenJDETracker YOLOv3: backbone: DarkNet diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/__init__.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..258e4c9010832936f098e6febe777ac556f0668f --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/__init__.py @@ -0,0 +1,25 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import matching +from . import tracker +from . import motion +from . import visualization +from . import utils + +from .matching import * +from .tracker import * +from .motion import * +from .visualization import * +from .utils import * diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/__init__.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..54c6680f79f16247c562a9da1024dd3e1de4c57f --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import jde_matching +from . import deepsort_matching + +from .jde_matching import * +from .deepsort_matching import * diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/deepsort_matching.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/deepsort_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..c55aa8876cc128f512aa4e2e4e48a935a3f8dd77 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/deepsort_matching.py @@ -0,0 +1,368 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/nwojke/deep_sort/tree/master/deep_sort +""" + +import numpy as np +from scipy.optimize import linear_sum_assignment +from ..motion import kalman_filter + +INFTY_COST = 1e+5 + +__all__ = [ + 'iou_1toN', + 'iou_cost', + '_nn_euclidean_distance', + '_nn_cosine_distance', + 'NearestNeighborDistanceMetric', + 'min_cost_matching', + 'matching_cascade', + 'gate_cost_matrix', +] + + +def iou_1toN(bbox, candidates): + """ + Computer intersection over union (IoU) by one box to N candidates. + + Args: + bbox (ndarray): A bounding box in format `(top left x, top left y, width, height)`. + candidates (ndarray): A matrix of candidate bounding boxes (one per row) in the + same format as `bbox`. + + Returns: + ious (ndarray): The intersection over union in [0, 1] between the `bbox` + and each candidate. A higher score means a larger fraction of the + `bbox` is occluded by the candidate. + """ + bbox_tl = bbox[:2] + bbox_br = bbox[:2] + bbox[2:] + candidates_tl = candidates[:, :2] + candidates_br = candidates[:, :2] + candidates[:, 2:] + + tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis], + np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]] + br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis], + np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]] + wh = np.maximum(0., br - tl) + + area_intersection = wh.prod(axis=1) + area_bbox = bbox[2:].prod() + area_candidates = candidates[:, 2:].prod(axis=1) + ious = area_intersection / (area_bbox + area_candidates - area_intersection) + return ious + + +def iou_cost(tracks, detections, track_indices=None, detection_indices=None): + """ + IoU distance metric. + + Args: + tracks (list[Track]): A list of tracks. + detections (list[Detection]): A list of detections. + track_indices (Optional[list[int]]): A list of indices to tracks that + should be matched. Defaults to all `tracks`. + detection_indices (Optional[list[int]]): A list of indices to detections + that should be matched. Defaults to all `detections`. + + Returns: + cost_matrix (ndarray): A cost matrix of shape len(track_indices), + len(detection_indices) where entry (i, j) is + `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + cost_matrix = np.zeros((len(track_indices), len(detection_indices))) + for row, track_idx in enumerate(track_indices): + if tracks[track_idx].time_since_update > 1: + cost_matrix[row, :] = 1e+5 + continue + + bbox = tracks[track_idx].to_tlwh() + candidates = np.asarray([detections[i].tlwh for i in detection_indices]) + cost_matrix[row, :] = 1. - iou_1toN(bbox, candidates) + return cost_matrix + + +def _nn_euclidean_distance(s, q): + """ + Compute pair-wise squared (Euclidean) distance between points in `s` and `q`. + + Args: + s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M. + q (ndarray): Query points: an LxM matrix of L samples of dimensionality M. + + Returns: + distances (ndarray): A vector of length M that contains for each entry in `q` the + smallest Euclidean distance to a sample in `s`. + """ + s, q = np.asarray(s), np.asarray(q) + if len(s) == 0 or len(q) == 0: + return np.zeros((len(s), len(q))) + s2, q2 = np.square(s).sum(axis=1), np.square(q).sum(axis=1) + distances = -2. * np.dot(s, q.T) + s2[:, None] + q2[None, :] + distances = np.clip(distances, 0., float(np.inf)) + + return np.maximum(0.0, distances.min(axis=0)) + + +def _nn_cosine_distance(s, q): + """ + Compute pair-wise cosine distance between points in `s` and `q`. + + Args: + s (ndarray): Sample points: an NxM matrix of N samples of dimensionality M. + q (ndarray): Query points: an LxM matrix of L samples of dimensionality M. + + Returns: + distances (ndarray): A vector of length M that contains for each entry in `q` the + smallest Euclidean distance to a sample in `s`. + """ + s = np.asarray(s) / np.linalg.norm(s, axis=1, keepdims=True) + q = np.asarray(q) / np.linalg.norm(q, axis=1, keepdims=True) + distances = 1. - np.dot(s, q.T) + + return distances.min(axis=0) + + +class NearestNeighborDistanceMetric(object): + """ + A nearest neighbor distance metric that, for each target, returns + the closest distance to any sample that has been observed so far. + + Args: + metric (str): Either "euclidean" or "cosine". + matching_threshold (float): The matching threshold. Samples with larger + distance are considered an invalid match. + budget (Optional[int]): If not None, fix samples per class to at most + this number. Removes the oldest samples when the budget is reached. + + Attributes: + samples (Dict[int -> List[ndarray]]): A dictionary that maps from target + identities to the list of samples that have been observed so far. + """ + + def __init__(self, metric, matching_threshold, budget=None): + if metric == "euclidean": + self._metric = _nn_euclidean_distance + elif metric == "cosine": + self._metric = _nn_cosine_distance + else: + raise ValueError("Invalid metric; must be either 'euclidean' or 'cosine'") + self.matching_threshold = matching_threshold + self.budget = budget + self.samples = {} + + def partial_fit(self, features, targets, active_targets): + """ + Update the distance metric with new data. + + Args: + features (ndarray): An NxM matrix of N features of dimensionality M. + targets (ndarray): An integer array of associated target identities. + active_targets (List[int]): A list of targets that are currently + present in the scene. + """ + for feature, target in zip(features, targets): + self.samples.setdefault(target, []).append(feature) + if self.budget is not None: + self.samples[target] = self.samples[target][-self.budget:] + self.samples = {k: self.samples[k] for k in active_targets} + + def distance(self, features, targets): + """ + Compute distance between features and targets. + + Args: + features (ndarray): An NxM matrix of N features of dimensionality M. + targets (list[int]): A list of targets to match the given `features` against. + + Returns: + cost_matrix (ndarray): a cost matrix of shape len(targets), len(features), + where element (i, j) contains the closest squared distance between + `targets[i]` and `features[j]`. + """ + cost_matrix = np.zeros((len(targets), len(features))) + for i, target in enumerate(targets): + cost_matrix[i, :] = self._metric(self.samples[target], features) + return cost_matrix + + +def min_cost_matching(distance_metric, max_distance, tracks, detections, track_indices=None, detection_indices=None): + """ + Solve linear assignment problem. + + Args: + distance_metric : + Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as + well as a list of N track indices and M detection indices. The + metric should return the NxM dimensional cost matrix, where element + (i, j) is the association cost between the i-th track in the given + track indices and the j-th detection in the given detection_indices. + max_distance (float): Gating threshold. Associations with cost larger + than this value are disregarded. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (list[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + + Returns: + A tuple (List[(int, int)], List[int], List[int]) with the following + three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = np.arange(len(tracks)) + if detection_indices is None: + detection_indices = np.arange(len(detections)) + + if len(detection_indices) == 0 or len(track_indices) == 0: + return [], track_indices, detection_indices # Nothing to match. + + cost_matrix = distance_metric(tracks, detections, track_indices, detection_indices) + + cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5 + indices = linear_sum_assignment(cost_matrix) + + matches, unmatched_tracks, unmatched_detections = [], [], [] + for col, detection_idx in enumerate(detection_indices): + if col not in indices[1]: + unmatched_detections.append(detection_idx) + for row, track_idx in enumerate(track_indices): + if row not in indices[0]: + unmatched_tracks.append(track_idx) + for row, col in zip(indices[0], indices[1]): + track_idx = track_indices[row] + detection_idx = detection_indices[col] + if cost_matrix[row, col] > max_distance: + unmatched_tracks.append(track_idx) + unmatched_detections.append(detection_idx) + else: + matches.append((track_idx, detection_idx)) + return matches, unmatched_tracks, unmatched_detections + + +def matching_cascade(distance_metric, + max_distance, + cascade_depth, + tracks, + detections, + track_indices=None, + detection_indices=None): + """ + Run matching cascade. + + Args: + distance_metric : + Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray + The distance metric is given a list of tracks and detections as + well as a list of N track indices and M detection indices. The + metric should return the NxM dimensional cost matrix, where element + (i, j) is the association cost between the i-th track in the given + track indices and the j-th detection in the given detection_indices. + max_distance (float): Gating threshold. Associations with cost larger + than this value are disregarded. + cascade_depth (int): The cascade depth, should be se to the maximum + track age. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (list[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + + Returns: + A tuple (List[(int, int)], List[int], List[int]) with the following + three entries: + * A list of matched track and detection indices. + * A list of unmatched track indices. + * A list of unmatched detection indices. + """ + if track_indices is None: + track_indices = list(range(len(tracks))) + if detection_indices is None: + detection_indices = list(range(len(detections))) + + unmatched_detections = detection_indices + matches = [] + for level in range(cascade_depth): + if len(unmatched_detections) == 0: # No detections left + break + + track_indices_l = [k for k in track_indices if tracks[k].time_since_update == 1 + level] + if len(track_indices_l) == 0: # Nothing to match at this level + continue + + matches_l, _, unmatched_detections = \ + min_cost_matching( + distance_metric, max_distance, tracks, detections, + track_indices_l, unmatched_detections) + matches += matches_l + unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches)) + return matches, unmatched_tracks, unmatched_detections + + +def gate_cost_matrix(kf, + cost_matrix, + tracks, + detections, + track_indices, + detection_indices, + gated_cost=INFTY_COST, + only_position=False): + """ + Invalidate infeasible entries in cost matrix based on the state + distributions obtained by Kalman filtering. + + Args: + kf (object): The Kalman filter. + cost_matrix (ndarray): The NxM dimensional cost matrix, where N is the + number of track indices and M is the number of detection indices, + such that entry (i, j) is the association cost between + `tracks[track_indices[i]]` and `detections[detection_indices[j]]`. + tracks (list[Track]): A list of predicted tracks at the current time + step. + detections (list[Detection]): A list of detections at the current time + step. + track_indices (List[int]): List of track indices that maps rows in + `cost_matrix` to tracks in `tracks`. + detection_indices (List[int]): List of detection indices that maps + columns in `cost_matrix` to detections in `detections`. + gated_cost (Optional[float]): Entries in the cost matrix corresponding + to infeasible associations are set this value. Defaults to a very + large value. + only_position (Optional[bool]): If True, only the x, y position of the + state distribution is considered during gating. Default False. + """ + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([detections[i].to_xyah() for i in detection_indices]) + for row, track_idx in enumerate(track_indices): + track = tracks[track_idx] + gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position) + cost_matrix[row, gating_distance > gating_threshold] = gated_cost + return cost_matrix diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/jde_matching.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/jde_matching.py new file mode 100644 index 0000000000000000000000000000000000000000..bf2e891c391c98ed8944f88377f62c9722fa5155 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/matching/jde_matching.py @@ -0,0 +1,123 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/matching.py +""" + +import lap +import scipy +import numpy as np +from scipy.spatial.distance import cdist +from ..motion import kalman_filter + +from ppdet.utils.logger import setup_logger +logger = setup_logger(__name__) + +__all__ = [ + 'merge_matches', + 'linear_assignment', + 'cython_bbox_ious', + 'iou_distance', + 'embedding_distance', + 'fuse_motion', +] + + +def merge_matches(m1, m2, shape): + O, P, Q = shape + m1 = np.asarray(m1) + m2 = np.asarray(m2) + + M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) + M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) + + mask = M1 * M2 + match = mask.nonzero() + match = list(zip(match[0], match[1])) + unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) + unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) + + return match, unmatched_O, unmatched_Q + + +def linear_assignment(cost_matrix, thresh): + if cost_matrix.size == 0: + return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1])) + matches, unmatched_a, unmatched_b = [], [], [] + cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) + for ix, mx in enumerate(x): + if mx >= 0: + matches.append([ix, mx]) + unmatched_a = np.where(x < 0)[0] + unmatched_b = np.where(y < 0)[0] + matches = np.asarray(matches) + return matches, unmatched_a, unmatched_b + + +def cython_bbox_ious(atlbrs, btlbrs): + ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) + if ious.size == 0: + return ious + try: + import cython_bbox + except Exception as e: + logger.error('cython_bbox not found, please install cython_bbox.' 'for example: `pip install cython_bbox`.') + raise e + + ious = cython_bbox.bbox_overlaps( + np.ascontiguousarray(atlbrs, dtype=np.float), np.ascontiguousarray(btlbrs, dtype=np.float)) + return ious + + +def iou_distance(atracks, btracks): + """ + Compute cost based on IoU between two list[STrack]. + """ + if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 + and isinstance(btracks[0], np.ndarray)): + atlbrs = atracks + btlbrs = btracks + else: + atlbrs = [track.tlbr for track in atracks] + btlbrs = [track.tlbr for track in btracks] + _ious = cython_bbox_ious(atlbrs, btlbrs) + cost_matrix = 1 - _ious + + return cost_matrix + + +def embedding_distance(tracks, detections, metric='euclidean'): + """ + Compute cost based on features between two list[STrack]. + """ + cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) + if cost_matrix.size == 0: + return cost_matrix + det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) + track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) + cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) # Nomalized features + return cost_matrix + + +def fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98): + if cost_matrix.size == 0: + return cost_matrix + gating_dim = 2 if only_position else 4 + gating_threshold = kalman_filter.chi2inv95[gating_dim] + measurements = np.asarray([det.to_xyah() for det in detections]) + for row, track in enumerate(tracks): + gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position, metric='maha') + cost_matrix[row, gating_distance > gating_threshold] = np.inf + cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance + return cost_matrix diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/motion/__init__.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/motion/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e42dd0b019d66d6ea07bec1ad90cf9a8d53d8172 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/motion/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import kalman_filter + +from .kalman_filter import * diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/motion/kalman_filter.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/motion/kalman_filter.py new file mode 100644 index 0000000000000000000000000000000000000000..7cc182e4c5e76e0688688c883b2a24fa30df9c74 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/motion/kalman_filter.py @@ -0,0 +1,237 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/kalman_filter.py +""" + +import numpy as np +import scipy.linalg + +__all__ = ['KalmanFilter'] +""" +Table for the 0.95 quantile of the chi-square distribution with N degrees of +freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv +function and used as Mahalanobis gating threshold. +""" + +chi2inv95 = {1: 3.8415, 2: 5.9915, 3: 7.8147, 4: 9.4877, 5: 11.070, 6: 12.592, 7: 14.067, 8: 15.507, 9: 16.919} + + +class KalmanFilter(object): + """ + A simple Kalman filter for tracking bounding boxes in image space. + + The 8-dimensional state space + + x, y, a, h, vx, vy, va, vh + + contains the bounding box center position (x, y), aspect ratio a, height h, + and their respective velocities. + + Object motion follows a constant velocity model. The bounding box location + (x, y, a, h) is taken as direct observation of the state space (linear + observation model). + + """ + + def __init__(self): + ndim, dt = 4, 1. + + # Create Kalman filter model matrices. + self._motion_mat = np.eye(2 * ndim, 2 * ndim) + for i in range(ndim): + self._motion_mat[i, ndim + i] = dt + self._update_mat = np.eye(ndim, 2 * ndim) + + # Motion and observation uncertainty are chosen relative to the current + # state estimate. These weights control the amount of uncertainty in + # the model. This is a bit hacky. + self._std_weight_position = 1. / 20 + self._std_weight_velocity = 1. / 160 + + def initiate(self, measurement): + """ + Create track from unassociated measurement. + + Args: + measurement (ndarray): Bounding box coordinates (x, y, a, h) with + center position (x, y), aspect ratio a, and height h. + + Returns: + The mean vector (8 dimensional) and covariance matrix (8x8 + dimensional) of the new track. Unobserved velocities are + initialized to 0 mean. + """ + mean_pos = measurement + mean_vel = np.zeros_like(mean_pos) + mean = np.r_[mean_pos, mean_vel] + + std = [ + 2 * self._std_weight_position * measurement[3], 2 * self._std_weight_position * measurement[3], 1e-2, + 2 * self._std_weight_position * measurement[3], 10 * self._std_weight_velocity * measurement[3], + 10 * self._std_weight_velocity * measurement[3], 1e-5, 10 * self._std_weight_velocity * measurement[3] + ] + covariance = np.diag(np.square(std)) + return mean, covariance + + def predict(self, mean, covariance): + """ + Run Kalman filter prediction step. + + Args: + mean (ndarray): The 8 dimensional mean vector of the object state + at the previous time step. + covariance (ndarray): The 8x8 dimensional covariance matrix of the + object state at the previous time step. + + Returns: + The mean vector and covariance matrix of the predicted state. + Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-2, + self._std_weight_position * mean[3] + ] + std_vel = [ + self._std_weight_velocity * mean[3], self._std_weight_velocity * mean[3], 1e-5, + self._std_weight_velocity * mean[3] + ] + motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) + + #mean = np.dot(self._motion_mat, mean) + mean = np.dot(mean, self._motion_mat.T) + covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov + + return mean, covariance + + def project(self, mean, covariance): + """ + Project state distribution to measurement space. + + Args + mean (ndarray): The state's mean vector (8 dimensional array). + covariance (ndarray): The state's covariance matrix (8x8 dimensional). + + Returns: + The projected mean and covariance matrix of the given state estimate. + """ + std = [ + self._std_weight_position * mean[3], self._std_weight_position * mean[3], 1e-1, + self._std_weight_position * mean[3] + ] + innovation_cov = np.diag(np.square(std)) + + mean = np.dot(self._update_mat, mean) + covariance = np.linalg.multi_dot((self._update_mat, covariance, self._update_mat.T)) + return mean, covariance + innovation_cov + + def multi_predict(self, mean, covariance): + """ + Run Kalman filter prediction step (Vectorized version). + + Args: + mean (ndarray): The Nx8 dimensional mean matrix of the object states + at the previous time step. + covariance (ndarray): The Nx8x8 dimensional covariance matrics of the + object states at the previous time step. + + Returns: + The mean vector and covariance matrix of the predicted state. + Unobserved velocities are initialized to 0 mean. + """ + std_pos = [ + self._std_weight_position * mean[:, 3], self._std_weight_position * mean[:, 3], + 1e-2 * np.ones_like(mean[:, 3]), self._std_weight_position * mean[:, 3] + ] + std_vel = [ + self._std_weight_velocity * mean[:, 3], self._std_weight_velocity * mean[:, 3], + 1e-5 * np.ones_like(mean[:, 3]), self._std_weight_velocity * mean[:, 3] + ] + sqr = np.square(np.r_[std_pos, std_vel]).T + + motion_cov = [] + for i in range(len(mean)): + motion_cov.append(np.diag(sqr[i])) + motion_cov = np.asarray(motion_cov) + + mean = np.dot(mean, self._motion_mat.T) + left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) + covariance = np.dot(left, self._motion_mat.T) + motion_cov + + return mean, covariance + + def update(self, mean, covariance, measurement): + """ + Run Kalman filter correction step. + + Args: + mean (ndarray): The predicted state's mean vector (8 dimensional). + covariance (ndarray): The state's covariance matrix (8x8 dimensional). + measurement (ndarray): The 4 dimensional measurement vector + (x, y, a, h), where (x, y) is the center position, a the aspect + ratio, and h the height of the bounding box. + + Returns: + The measurement-corrected state distribution. + """ + projected_mean, projected_cov = self.project(mean, covariance) + + chol_factor, lower = scipy.linalg.cho_factor(projected_cov, lower=True, check_finite=False) + kalman_gain = scipy.linalg.cho_solve((chol_factor, lower), + np.dot(covariance, self._update_mat.T).T, + check_finite=False).T + innovation = measurement - projected_mean + + new_mean = mean + np.dot(innovation, kalman_gain.T) + new_covariance = covariance - np.linalg.multi_dot((kalman_gain, projected_cov, kalman_gain.T)) + return new_mean, new_covariance + + def gating_distance(self, mean, covariance, measurements, only_position=False, metric='maha'): + """ + Compute gating distance between state distribution and measurements. + A suitable distance threshold can be obtained from `chi2inv95`. If + `only_position` is False, the chi-square distribution has 4 degrees of + freedom, otherwise 2. + + Args: + mean (ndarray): Mean vector over the state distribution (8 + dimensional). + covariance (ndarray): Covariance of the state distribution (8x8 + dimensional). + measurements (ndarray): An Nx4 dimensional matrix of N measurements, + each in format (x, y, a, h) where (x, y) is the bounding box center + position, a the aspect ratio, and h the height. + only_position (Optional[bool]): If True, distance computation is + done with respect to the bounding box center position only. + metric (str): Metric type, 'gaussian' or 'maha'. + + Returns + An array of length N, where the i-th element contains the squared + Mahalanobis distance between (mean, covariance) and `measurements[i]`. + """ + mean, covariance = self.project(mean, covariance) + if only_position: + mean, covariance = mean[:2], covariance[:2, :2] + measurements = measurements[:, :2] + + d = measurements - mean + if metric == 'gaussian': + return np.sum(d * d, axis=1) + elif metric == 'maha': + cholesky_factor = np.linalg.cholesky(covariance) + z = scipy.linalg.solve_triangular(cholesky_factor, d.T, lower=True, check_finite=False, overwrite_b=True) + squared_maha = np.sum(z * z, axis=0) + return squared_maha + else: + raise ValueError('invalid distance metric') diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/__init__.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..904822119661be61141715c638388db9d045fee1 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import base_jde_tracker +from . import base_sde_tracker +from . import jde_tracker + +from .base_jde_tracker import * +from .base_sde_tracker import * +from .jde_tracker import * diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/base_jde_tracker.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/base_jde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..9505a709ee573acecf4b5dd7e02a06cee9d44284 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/base_jde_tracker.py @@ -0,0 +1,257 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py +""" + +import numpy as np +from collections import deque, OrderedDict +from ..matching import jde_matching as matching +from ppdet.core.workspace import register, serializable + +__all__ = [ + 'TrackState', + 'BaseTrack', + 'STrack', + 'joint_stracks', + 'sub_stracks', + 'remove_duplicate_stracks', +] + + +class TrackState(object): + New = 0 + Tracked = 1 + Lost = 2 + Removed = 3 + + +class BaseTrack(object): + _count = 0 + + track_id = 0 + is_activated = False + state = TrackState.New + + history = OrderedDict() + features = [] + curr_feature = None + score = 0 + start_frame = 0 + frame_id = 0 + time_since_update = 0 + + # multi-camera + location = (np.inf, np.inf) + + @property + def end_frame(self): + return self.frame_id + + @staticmethod + def next_id(): + BaseTrack._count += 1 + return BaseTrack._count + + def activate(self, *args): + raise NotImplementedError + + def predict(self): + raise NotImplementedError + + def update(self, *args, **kwargs): + raise NotImplementedError + + def mark_lost(self): + self.state = TrackState.Lost + + def mark_removed(self): + self.state = TrackState.Removed + + +class STrack(BaseTrack): + def __init__(self, tlwh, score, temp_feat, buffer_size=30): + # wait activate + self._tlwh = np.asarray(tlwh, dtype=np.float) + self.kalman_filter = None + self.mean, self.covariance = None, None + self.is_activated = False + + self.score = score + self.tracklet_len = 0 + + self.smooth_feat = None + self.update_features(temp_feat) + self.features = deque([], maxlen=buffer_size) + self.alpha = 0.9 + + def update_features(self, feat): + feat /= np.linalg.norm(feat) + self.curr_feat = feat + if self.smooth_feat is None: + self.smooth_feat = feat + else: + self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat + self.features.append(feat) + self.smooth_feat /= np.linalg.norm(self.smooth_feat) + + def predict(self): + mean_state = self.mean.copy() + if self.state != TrackState.Tracked: + mean_state[7] = 0 + self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance) + + @staticmethod + def multi_predict(stracks, kalman_filter): + if len(stracks) > 0: + multi_mean = np.asarray([st.mean.copy() for st in stracks]) + multi_covariance = np.asarray([st.covariance for st in stracks]) + for i, st in enumerate(stracks): + if st.state != TrackState.Tracked: + multi_mean[i][7] = 0 + multi_mean, multi_covariance = kalman_filter.multi_predict(multi_mean, multi_covariance) + for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)): + stracks[i].mean = mean + stracks[i].covariance = cov + + def activate(self, kalman_filter, frame_id): + """Start a new tracklet""" + self.kalman_filter = kalman_filter + self.track_id = self.next_id() + self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh)) + + self.tracklet_len = 0 + self.state = TrackState.Tracked + if frame_id == 1: + self.is_activated = True + self.frame_id = frame_id + self.start_frame = frame_id + + def re_activate(self, new_track, frame_id, new_id=False): + self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, + self.tlwh_to_xyah(new_track.tlwh)) + + self.update_features(new_track.curr_feat) + self.tracklet_len = 0 + self.state = TrackState.Tracked + self.is_activated = True + self.frame_id = frame_id + if new_id: + self.track_id = self.next_id() + + def update(self, new_track, frame_id, update_feature=True): + self.frame_id = frame_id + self.tracklet_len += 1 + + new_tlwh = new_track.tlwh + self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh)) + self.state = TrackState.Tracked + self.is_activated = True + + self.score = new_track.score + if update_feature: + self.update_features(new_track.curr_feat) + + @property + def tlwh(self): + """ + Get current position in bounding box format `(top left x, top left y, + width, height)`. + """ + if self.mean is None: + return self._tlwh.copy() + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + @property + def tlbr(self): + """ + Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + @staticmethod + def tlwh_to_xyah(tlwh): + """ + Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = np.asarray(tlwh).copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + def to_xyah(self): + return self.tlwh_to_xyah(self.tlwh) + + @staticmethod + def tlbr_to_tlwh(tlbr): + ret = np.asarray(tlbr).copy() + ret[2:] -= ret[:2] + return ret + + @staticmethod + def tlwh_to_tlbr(tlwh): + ret = np.asarray(tlwh).copy() + ret[2:] += ret[:2] + return ret + + def __repr__(self): + return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame) + + +def joint_stracks(tlista, tlistb): + exists = {} + res = [] + for t in tlista: + exists[t.track_id] = 1 + res.append(t) + for t in tlistb: + tid = t.track_id + if not exists.get(tid, 0): + exists[tid] = 1 + res.append(t) + return res + + +def sub_stracks(tlista, tlistb): + stracks = {} + for t in tlista: + stracks[t.track_id] = t + for t in tlistb: + tid = t.track_id + if stracks.get(tid, 0): + del stracks[tid] + return list(stracks.values()) + + +def remove_duplicate_stracks(stracksa, stracksb): + pdist = matching.iou_distance(stracksa, stracksb) + pairs = np.where(pdist < 0.15) + dupa, dupb = list(), list() + for p, q in zip(*pairs): + timep = stracksa[p].frame_id - stracksa[p].start_frame + timeq = stracksb[q].frame_id - stracksb[q].start_frame + if timep > timeq: + dupb.append(q) + else: + dupa.append(p) + resa = [t for i, t in enumerate(stracksa) if not i in dupa] + resb = [t for i, t in enumerate(stracksb) if not i in dupb] + return resa, resb diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/base_sde_tracker.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/base_sde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..2e811e536a42ff781f60872b448b251de0301f61 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/base_sde_tracker.py @@ -0,0 +1,133 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/nwojke/deep_sort/blob/master/deep_sort/track.py +""" + +from ppdet.core.workspace import register, serializable + +__all__ = ['TrackState', 'Track'] + + +class TrackState(object): + """ + Enumeration type for the single target track state. Newly created tracks are + classified as `tentative` until enough evidence has been collected. Then, + the track state is changed to `confirmed`. Tracks that are no longer alive + are classified as `deleted` to mark them for removal from the set of active + tracks. + """ + Tentative = 1 + Confirmed = 2 + Deleted = 3 + + +class Track(object): + """ + A single target track with state space `(x, y, a, h)` and associated + velocities, where `(x, y)` is the center of the bounding box, `a` is the + aspect ratio and `h` is the height. + + Args: + mean (ndarray): Mean vector of the initial state distribution. + covariance (ndarray): Covariance matrix of the initial state distribution. + track_id (int): A unique track identifier. + n_init (int): Number of consecutive detections before the track is confirmed. + The track state is set to `Deleted` if a miss occurs within the first + `n_init` frames. + max_age (int): The maximum number of consecutive misses before the track + state is set to `Deleted`. + feature (Optional[ndarray]): Feature vector of the detection this track + originates from. If not None, this feature is added to the `features` cache. + + Attributes: + hits (int): Total number of measurement updates. + age (int): Total number of frames since first occurance. + time_since_update (int): Total number of frames since last measurement + update. + state (TrackState): The current track state. + features (List[ndarray]): A cache of features. On each measurement update, + the associated feature vector is added to this list. + """ + + def __init__(self, mean, covariance, track_id, n_init, max_age, feature=None): + self.mean = mean + self.covariance = covariance + self.track_id = track_id + self.hits = 1 + self.age = 1 + self.time_since_update = 0 + + self.state = TrackState.Tentative + self.features = [] + if feature is not None: + self.features.append(feature) + + self._n_init = n_init + self._max_age = max_age + + def to_tlwh(self): + """Get position in format `(top left x, top left y, width, height)`.""" + ret = self.mean[:4].copy() + ret[2] *= ret[3] + ret[:2] -= ret[2:] / 2 + return ret + + def to_tlbr(self): + """Get position in bounding box format `(min x, miny, max x, max y)`.""" + ret = self.to_tlwh() + ret[2:] = ret[:2] + ret[2:] + return ret + + def predict(self, kalman_filter): + """ + Propagate the state distribution to the current time step using a Kalman + filter prediction step. + """ + self.mean, self.covariance = kalman_filter.predict(self.mean, self.covariance) + self.age += 1 + self.time_since_update += 1 + + def update(self, kalman_filter, detection): + """ + Perform Kalman filter measurement update step and update the associated + detection feature cache. + """ + self.mean, self.covariance = kalman_filter.update(self.mean, self.covariance, detection.to_xyah()) + self.features.append(detection.feature) + + self.hits += 1 + self.time_since_update = 0 + if self.state == TrackState.Tentative and self.hits >= self._n_init: + self.state = TrackState.Confirmed + + def mark_missed(self): + """Mark this track as missed (no association at the current time step). + """ + if self.state == TrackState.Tentative: + self.state = TrackState.Deleted + elif self.time_since_update > self._max_age: + self.state = TrackState.Deleted + + def is_tentative(self): + """Returns True if this track is tentative (unconfirmed).""" + return self.state == TrackState.Tentative + + def is_confirmed(self): + """Returns True if this track is confirmed.""" + return self.state == TrackState.Confirmed + + def is_deleted(self): + """Returns True if this track is dead and should be deleted.""" + return self.state == TrackState.Deleted diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/jde_tracker.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/jde_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..2e1cafb345b7687e563fc6d9c2c1769cb39d690c --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/tracker/jde_tracker.py @@ -0,0 +1,248 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This code is borrow from https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py +""" + +import paddle + +from ..matching import jde_matching as matching +from .base_jde_tracker import TrackState, BaseTrack, STrack +from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks + +from ppdet.core.workspace import register, serializable +from ppdet.utils.logger import setup_logger +logger = setup_logger(__name__) + +__all__ = ['FrozenJDETracker'] + + +@register +@serializable +class FrozenJDETracker(object): + __inject__ = ['motion'] + """ + JDE tracker + + Args: + det_thresh (float): threshold of detection score + track_buffer (int): buffer for tracker + min_box_area (int): min box area to filter out low quality boxes + vertical_ratio (float): w/h, the vertical ratio of the bbox to filter + bad results, set 1.6 default for pedestrian tracking. If set -1 + means no need to filter bboxes. + tracked_thresh (float): linear assignment threshold of tracked + stracks and detections + r_tracked_thresh (float): linear assignment threshold of + tracked stracks and unmatched detections + unconfirmed_thresh (float): linear assignment threshold of + unconfirmed stracks and unmatched detections + motion (object): KalmanFilter instance + conf_thres (float): confidence threshold for tracking + metric_type (str): either "euclidean" or "cosine", the distance metric + used for measurement to track association. + """ + + def __init__(self, + det_thresh=0.3, + track_buffer=30, + min_box_area=200, + vertical_ratio=1.6, + tracked_thresh=0.7, + r_tracked_thresh=0.5, + unconfirmed_thresh=0.7, + motion='KalmanFilter', + conf_thres=0, + metric_type='euclidean'): + self.det_thresh = det_thresh + self.track_buffer = track_buffer + self.min_box_area = min_box_area + self.vertical_ratio = vertical_ratio + + self.tracked_thresh = tracked_thresh + self.r_tracked_thresh = r_tracked_thresh + self.unconfirmed_thresh = unconfirmed_thresh + self.motion = motion + self.conf_thres = conf_thres + self.metric_type = metric_type + + self.frame_id = 0 + self.tracked_stracks = [] + self.lost_stracks = [] + self.removed_stracks = [] + + self.max_time_lost = 0 + # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer) + + def update(self, pred_dets, pred_embs): + """ + Processes the image frame and finds bounding box(detections). + Associates the detection with corresponding tracklets and also handles + lost, removed, refound and active tracklets. + + Args: + pred_dets (Tensor): Detection results of the image, shape is [N, 5]. + pred_embs (Tensor): Embedding results of the image, shape is [N, 512]. + + Return: + output_stracks (list): The list contains information regarding the + online_tracklets for the recieved image tensor. + """ + self.frame_id += 1 + activated_starcks = [] + # for storing active tracks, for the current frame + refind_stracks = [] + # Lost Tracks whose detections are obtained in the current frame + lost_stracks = [] + # The tracks which are not obtained in the current frame but are not + # removed. (Lost for some time lesser than the threshold for removing) + removed_stracks = [] + + remain_inds = paddle.nonzero(pred_dets[:, 4] > self.conf_thres) + if remain_inds.shape[0] == 0: + pred_dets = paddle.zeros([0, 1]) + pred_embs = paddle.zeros([0, 1]) + else: + pred_dets = paddle.gather(pred_dets, remain_inds) + pred_embs = paddle.gather(pred_embs, remain_inds) + + # Filter out the image with box_num = 0. pred_dets = [[0.0, 0.0, 0.0 ,0.0]] + empty_pred = True if len(pred_dets) == 1 and paddle.sum(pred_dets) == 0.0 else False + """ Step 1: Network forward, get detections & embeddings""" + if len(pred_dets) > 0 and not empty_pred: + pred_dets = pred_dets.numpy() + pred_embs = pred_embs.numpy() + detections = [ + STrack(STrack.tlbr_to_tlwh(tlbrs[:4]), tlbrs[4], f, 30) for (tlbrs, f) in zip(pred_dets, pred_embs) + ] + else: + detections = [] + ''' Add newly detected tracklets to tracked_stracks''' + unconfirmed = [] + tracked_stracks = [] # type: list[STrack] + for track in self.tracked_stracks: + if not track.is_activated: + # previous tracks which are not active in the current frame are added in unconfirmed list + unconfirmed.append(track) + else: + # Active tracks are added to the local list 'tracked_stracks' + tracked_stracks.append(track) + """ Step 2: First association, with embedding""" + # Combining currently tracked_stracks and lost_stracks + strack_pool = joint_stracks(tracked_stracks, self.lost_stracks) + # Predict the current location with KF + STrack.multi_predict(strack_pool, self.motion) + + dists = matching.embedding_distance(strack_pool, detections, metric=self.metric_type) + dists = matching.fuse_motion(self.motion, dists, strack_pool, detections) + # The dists is the list of distances of the detection with the tracks in strack_pool + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.tracked_thresh) + # The matches is the array for corresponding matches of the detection with the corresponding strack_pool + + for itracked, idet in matches: + # itracked is the id of the track and idet is the detection + track = strack_pool[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + # If the track is active, add the detection to the track + track.update(detections[idet], self.frame_id) + activated_starcks.append(track) + else: + # We have obtained a detection from a track which is not active, + # hence put the track in refind_stracks list + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + + # None of the steps below happen if there are no undetected tracks. + """ Step 3: Second association, with IOU""" + detections = [detections[i] for i in u_detection] + # detections is now a list of the unmatched detections + r_tracked_stracks = [] + # This is container for stracks which were tracked till the previous + # frame but no detection was found for it in the current frame. + + for i in u_track: + if strack_pool[i].state == TrackState.Tracked: + r_tracked_stracks.append(strack_pool[i]) + dists = matching.iou_distance(r_tracked_stracks, detections) + matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.r_tracked_thresh) + # matches is the list of detections which matched with corresponding + # tracks by IOU distance method. + + for itracked, idet in matches: + track = r_tracked_stracks[itracked] + det = detections[idet] + if track.state == TrackState.Tracked: + track.update(det, self.frame_id) + activated_starcks.append(track) + else: + track.re_activate(det, self.frame_id, new_id=False) + refind_stracks.append(track) + # Same process done for some unmatched detections, but now considering IOU_distance as measure + + for it in u_track: + track = r_tracked_stracks[it] + if not track.state == TrackState.Lost: + track.mark_lost() + lost_stracks.append(track) + # If no detections are obtained for tracks (u_track), the tracks are added to lost_tracks list and are marked lost + '''Deal with unconfirmed tracks, usually tracks with only one beginning frame''' + detections = [detections[i] for i in u_detection] + dists = matching.iou_distance(unconfirmed, detections) + matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=self.unconfirmed_thresh) + for itracked, idet in matches: + unconfirmed[itracked].update(detections[idet], self.frame_id) + activated_starcks.append(unconfirmed[itracked]) + + # The tracks which are yet not matched + for it in u_unconfirmed: + track = unconfirmed[it] + track.mark_removed() + removed_stracks.append(track) + + # after all these confirmation steps, if a new detection is found, it is initialized for a new track + """ Step 4: Init new stracks""" + for inew in u_detection: + track = detections[inew] + if track.score < self.det_thresh: + continue + track.activate(self.motion, self.frame_id) + activated_starcks.append(track) + """ Step 5: Update state""" + # If the tracks are lost for more frames than the threshold number, the tracks are removed. + for track in self.lost_stracks: + if self.frame_id - track.end_frame > self.max_time_lost: + track.mark_removed() + removed_stracks.append(track) + + # Update the self.tracked_stracks and self.lost_stracks using the updates in this step. + self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked] + self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks) + self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks) + + self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks) + self.lost_stracks.extend(lost_stracks) + self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks) + self.removed_stracks.extend(removed_stracks) + self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks) + # get scores of lost tracks + output_stracks = [track for track in self.tracked_stracks if track.is_activated] + + logger.debug('===========Frame {}=========='.format(self.frame_id)) + logger.debug('Activated: {}'.format([track.track_id for track in activated_starcks])) + logger.debug('Refind: {}'.format([track.track_id for track in refind_stracks])) + logger.debug('Lost: {}'.format([track.track_id for track in lost_stracks])) + logger.debug('Removed: {}'.format([track.track_id for track in removed_stracks])) + + return output_stracks diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/utils.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..12c61686a1715a965407822dcf19fd1081f292d7 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/utils.py @@ -0,0 +1,176 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import cv2 +import time +import paddle +import numpy as np + +__all__ = [ + 'Timer', + 'Detection', + 'load_det_results', + 'preprocess_reid', + 'get_crops', + 'clip_box', + 'scale_coords', +] + + +class Timer(object): + """ + This class used to compute and print the current FPS while evaling. + """ + + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + if average: + self.duration = self.average_time + else: + self.duration = self.diff + return self.duration + + def clear(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. + + +class Detection(object): + """ + This class represents a bounding box detection in a single image. + + Args: + tlwh (ndarray): Bounding box in format `(top left x, top left y, + width, height)`. + confidence (ndarray): Detector confidence score. + feature (Tensor): A feature vector that describes the object + contained in this image. + """ + + def __init__(self, tlwh, confidence, feature): + self.tlwh = np.asarray(tlwh, dtype=np.float32) + self.confidence = np.asarray(confidence, dtype=np.float32) + self.feature = feature + + def to_tlbr(self): + """ + Convert bounding box to format `(min x, min y, max x, max y)`, i.e., + `(top left, bottom right)`. + """ + ret = self.tlwh.copy() + ret[2:] += ret[:2] + return ret + + def to_xyah(self): + """ + Convert bounding box to format `(center x, center y, aspect ratio, + height)`, where the aspect ratio is `width / height`. + """ + ret = self.tlwh.copy() + ret[:2] += ret[2:] / 2 + ret[2] /= ret[3] + return ret + + +def load_det_results(det_file, num_frames): + assert os.path.exists(det_file) and os.path.isfile(det_file), \ + 'Error: det_file: {} not exist or not a file.'.format(det_file) + labels = np.loadtxt(det_file, dtype='float32', delimiter=',') + results_list = [] + for frame_i in range(0, num_frames): + results = {'bbox': [], 'score': []} + lables_with_frame = labels[labels[:, 0] == frame_i + 1] + for l in lables_with_frame: + results['bbox'].append(l[1:5]) + results['score'].append(l[5]) + results_list.append(results) + return results_list + + +def scale_coords(coords, input_shape, im_shape, scale_factor): + im_shape = im_shape.numpy()[0] + ratio = scale_factor[0][0] + pad_w = (input_shape[1] - int(im_shape[1])) / 2 + pad_h = (input_shape[0] - int(im_shape[0])) / 2 + coords = paddle.cast(coords, 'float32') + coords[:, 0::2] -= pad_w + coords[:, 1::2] -= pad_h + coords[:, 0:4] /= ratio + coords[:, :4] = paddle.clip(coords[:, :4], min=0, max=coords[:, :4].max()) + return coords.round() + + +def clip_box(xyxy, input_shape, im_shape, scale_factor): + im_shape = im_shape.numpy()[0] + ratio = scale_factor.numpy()[0][0] + img0_shape = [int(im_shape[0] / ratio), int(im_shape[1] / ratio)] + + xyxy[:, 0::2] = paddle.clip(xyxy[:, 0::2], min=0, max=img0_shape[1]) + xyxy[:, 1::2] = paddle.clip(xyxy[:, 1::2], min=0, max=img0_shape[0]) + return xyxy + + +def get_crops(xyxy, ori_img, pred_scores, w, h): + crops = [] + keep_scores = [] + xyxy = xyxy.numpy().astype(np.int64) + ori_img = ori_img.numpy() + ori_img = np.squeeze(ori_img, axis=0).transpose(1, 0, 2) + pred_scores = pred_scores.numpy() + for i, bbox in enumerate(xyxy): + if bbox[2] <= bbox[0] or bbox[3] <= bbox[1]: + continue + crop = ori_img[bbox[0]:bbox[2], bbox[1]:bbox[3], :] + crops.append(crop) + keep_scores.append(pred_scores[i]) + if len(crops) == 0: + return [], [] + crops = preprocess_reid(crops, w, h) + return crops, keep_scores + + +def preprocess_reid(imgs, w=64, h=192, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]): + im_batch = [] + for img in imgs: + img = cv2.resize(img, (w, h)) + img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 + img_mean = np.array(mean).reshape((3, 1, 1)) + img_std = np.array(std).reshape((3, 1, 1)) + img -= img_mean + img /= img_std + img = np.expand_dims(img, axis=0) + im_batch.append(img) + im_batch = np.concatenate(im_batch, 0) + return im_batch diff --git a/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/visualization.py b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/visualization.py new file mode 100644 index 0000000000000000000000000000000000000000..cd9c5b15e15f677b7955dd4eba40798e985315a1 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/modeling/mot/visualization.py @@ -0,0 +1,117 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cv2 +import numpy as np + + +def tlwhs_to_tlbrs(tlwhs): + tlbrs = np.copy(tlwhs) + if len(tlbrs) == 0: + return tlbrs + tlbrs[:, 2] += tlwhs[:, 0] + tlbrs[:, 3] += tlwhs[:, 1] + return tlbrs + + +def get_color(idx): + idx = idx * 3 + color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) + return color + + +def resize_image(image, max_size=800): + if max(image.shape[:2]) > max_size: + scale = float(max_size) / max(image.shape[:2]) + image = cv2.resize(image, None, fx=scale, fy=scale) + return image + + +def plot_tracking(image, tlwhs, obj_ids, scores=None, frame_id=0, fps=0., ids2=None): + im = np.ascontiguousarray(np.copy(image)) + im_h, im_w = im.shape[:2] + + top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 + + text_scale = max(1, image.shape[1] / 1600.) + text_thickness = 2 + line_thickness = max(1, int(image.shape[1] / 500.)) + + radius = max(5, int(im_w / 140.)) + cv2.putText( + im, + 'frame: %d fps: %.2f num: %d' % (frame_id, fps, len(tlwhs)), (0, int(15 * text_scale)), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=2) + + for i, tlwh in enumerate(tlwhs): + x1, y1, w, h = tlwh + intbox = tuple(map(int, (x1, y1, x1 + w, y1 + h))) + obj_id = int(obj_ids[i]) + id_text = '{}'.format(int(obj_id)) + if ids2 is not None: + id_text = id_text + ', {}'.format(int(ids2[i])) + _line_thickness = 1 if obj_id <= 0 else line_thickness + color = get_color(abs(obj_id)) + cv2.rectangle(im, intbox[0:2], intbox[2:4], color=color, thickness=line_thickness) + cv2.putText( + im, + id_text, (intbox[0], intbox[1] + 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 0, 255), + thickness=text_thickness) + + if scores is not None: + text = '{:.2f}'.format(float(scores[i])) + cv2.putText( + im, + text, (intbox[0], intbox[1] - 10), + cv2.FONT_HERSHEY_PLAIN, + text_scale, (0, 255, 255), + thickness=text_thickness) + return im + + +def plot_trajectory(image, tlwhs, track_ids): + image = image.copy() + for one_tlwhs, track_id in zip(tlwhs, track_ids): + color = get_color(int(track_id)) + for tlwh in one_tlwhs: + x1, y1, w, h = tuple(map(int, tlwh)) + cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2) + return image + + +def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None): + im = np.copy(image) + text_scale = max(1, image.shape[1] / 800.) + thickness = 2 if text_scale > 1.3 else 1 + for i, det in enumerate(tlbrs): + x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int) + if len(det) >= 7: + label = 'det' if det[5] > 0 else 'trk' + if ids is not None: + text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i]) + cv2.putText( + im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=thickness) + else: + text = '{}# {:.2f}'.format(label, det[6]) + + if scores is not None: + text = '{:.2f}'.format(scores[i]) + cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), thickness=thickness) + + cv2.rectangle(im, (x1, y1), (x2, y2), color, 2) + return im diff --git a/modules/video/multiple_object_tracking/jde_darknet53/module.py b/modules/video/multiple_object_tracking/jde_darknet53/module.py index 98b0c287596f004689e7c43a8ee17411c0fc9bf7..8ef1e6c481a225b332d2d9f193c68bd9acf17775 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/module.py +++ b/modules/video/multiple_object_tracking/jde_darknet53/module.py @@ -31,12 +31,13 @@ from .tracker import StreamTracker logger = setup_logger('Predict') -@moduleinfo(name="jde_darknet53", - type="CV/multiple_object_tracking", - author="paddlepaddle", - author_email="", - summary="JDE is a joint detection and appearance embedding model for multiple object tracking.", - version="1.0.0") +@moduleinfo( + name="jde_darknet53", + type="CV/multiple_object_tracking", + author="paddlepaddle", + author_email="", + summary="JDE is a joint detection and appearance embedding model for multiple object tracking.", + version="1.0.0") class JDETracker_1088x608: def __init__(self): self.pretrained_model = os.path.join(self.directory, "jde_darknet53_30e_1088x608") @@ -70,12 +71,13 @@ class JDETracker_1088x608: tracker.load_weights_jde(self.pretrained_model) signal.signal(signal.SIGINT, self.signalhandler) # inference - tracker.videostream_predict(video_stream=video_stream, - output_dir=output_dir, - data_type='mot', - model_type='JDE', - visualization=visualization, - draw_threshold=draw_threshold) + tracker.videostream_predict( + video_stream=video_stream, + output_dir=output_dir, + data_type='mot', + model_type='JDE', + visualization=visualization, + draw_threshold=draw_threshold) def stream_mode(self, output_dir='mot_result', visualization=True, draw_threshold=0.5, use_gpu=False): ''' @@ -106,11 +108,12 @@ class JDETracker_1088x608: return self def __enter__(self): - self.tracker_generator = self.tracker.imagestream_predict(self.output_dir, - data_type='mot', - model_type='JDE', - visualization=self.visualization, - draw_threshold=self.draw_threshold) + self.tracker_generator = self.tracker.imagestream_predict( + self.output_dir, + data_type='mot', + model_type='JDE', + visualization=self.visualization, + draw_threshold=self.draw_threshold) next(self.tracker_generator) def __exit__(self, exc_type, exc_value, traceback): @@ -129,10 +132,12 @@ class JDETracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter(output_video_path, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=[img.shape[1], img.shape[0]]) + video_writer = cv2.VideoWriter( + output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -169,10 +174,11 @@ class JDETracker_1088x608: """ Run as a command. """ - self.parser = argparse.ArgumentParser(description="Run the {} module.".format(self.name), - prog='hub run {}'.format(self.name), - usage='%(prog)s', - add_help=True) + self.parser = argparse.ArgumentParser( + description="Run the {} module.".format(self.name), + prog='hub run {}'.format(self.name), + usage='%(prog)s', + add_help=True) self.arg_input_group = self.parser.add_argument_group(title="Input options", description="Input data. Required") self.arg_config_group = self.parser.add_argument_group( @@ -204,10 +210,12 @@ class JDETracker_1088x608: logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter(output_video_path, - fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), - fps=30, - frameSize=[img.shape[1], img.shape[0]]) + video_writer = cv2.VideoWriter( + output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) @@ -223,22 +231,16 @@ class JDETracker_1088x608: """ self.arg_config_group.add_argument('--use_gpu', action='store_true', help="use GPU or not") - self.arg_config_group.add_argument('--output_dir', - type=str, - default='mot_result', - help='Directory name for output tracking results.') - self.arg_config_group.add_argument('--visualization', - action='store_true', - help="whether to save output as images.") - self.arg_config_group.add_argument("--draw_threshold", - type=float, - default=0.5, - help="Threshold to reserve the result for visualization.") + self.arg_config_group.add_argument( + '--output_dir', type=str, default='mot_result', help='Directory name for output tracking results.') + self.arg_config_group.add_argument( + '--visualization', action='store_true', help="whether to save output as images.") + self.arg_config_group.add_argument( + "--draw_threshold", type=float, default=0.5, help="Threshold to reserve the result for visualization.") def add_module_input_arg(self): """ Add the command input options. """ - self.arg_input_group.add_argument('--video_stream', - type=str, - help="path to video stream, can be a video file or stream device number.") + self.arg_input_group.add_argument( + '--video_stream', type=str, help="path to video stream, can be a video file or stream device number.") diff --git a/modules/video/multiple_object_tracking/jde_darknet53/requirements.txt b/modules/video/multiple_object_tracking/jde_darknet53/requirements.txt index 01118b11cddcf87ff9e97436ec5b7e79c00588b8..8e327df2acddea95260bb92551cfc9afee06b642 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/requirements.txt +++ b/modules/video/multiple_object_tracking/jde_darknet53/requirements.txt @@ -1,2 +1,4 @@ -paddledet >= 2.1.0 +cython +paddledet >= 2.2.0 opencv-python +imageio diff --git a/modules/video/multiple_object_tracking/jde_darknet53/tracker.py b/modules/video/multiple_object_tracking/jde_darknet53/tracker.py index 5a984359edc18cf7e128e12e754fe201bf55fccc..1e4ab7d0b3a996775407eb1334c6183db26129d7 100644 --- a/modules/video/multiple_object_tracking/jde_darknet53/tracker.py +++ b/modules/video/multiple_object_tracking/jde_darknet53/tracker.py @@ -16,18 +16,19 @@ import cv2 import glob import paddle import numpy as np +import collections -from ppdet.core.workspace import create from ppdet.utils.checkpoint import load_weight, load_pretrain_weight -from ppdet.modeling.mot.utils import Detection, get_crops, scale_coords, clip_box -from ppdet.modeling.mot.utils import Timer, load_det_results -from ppdet.modeling.mot import visualization as mot_vis from ppdet.metrics import Metric, MOTMetric, KITTIMOTMetric import ppdet.utils.stats as stats from ppdet.engine.callbacks import Callback, ComposeCallback +from ppdet.core.workspace import create from ppdet.utils.logger import setup_logger from .dataset import MOTVideoStream, MOTImageStream +from .modeling.mot.utils import Detection, get_crops, scale_coords, clip_box +from .modeling.mot import visualization as mot_vis +from .utils import Timer logger = setup_logger(__name__) @@ -70,7 +71,6 @@ class StreamTracker(object): timer.tic() pred_dets, pred_embs = self.model(data) online_targets = self.model.tracker.update(pred_dets, pred_embs) - online_tlwhs, online_ids = [], [] online_scores = [] for t in online_targets: @@ -109,7 +109,6 @@ class StreamTracker(object): with paddle.no_grad(): pred_dets, pred_embs = self.model(data) online_targets = self.model.tracker.update(pred_dets, pred_embs) - online_tlwhs, online_ids = [], [] online_scores = [] for t in online_targets: @@ -160,13 +159,12 @@ class StreamTracker(object): yield results = [] while True: - with paddle.no_grad(): - try: - results, nf = next(generator) - yield results - except StopIteration as e: - self.write_mot_results(result_filename, results, data_type) - return + try: + results, nf = next(generator) + yield results + except StopIteration as e: + self.write_mot_results(result_filename, results, data_type) + return def videostream_predict(self, video_stream, @@ -176,7 +174,7 @@ class StreamTracker(object): visualization=True, draw_threshold=0.5): assert video_stream is not None, \ - "--video_file or --image_dir should be set." + "--video_stream should be set." if not os.path.exists(output_dir): os.makedirs(output_dir) result_root = os.path.join(output_dir, 'mot_results') @@ -214,7 +212,12 @@ class StreamTracker(object): logger.info('No output images to save for video') return img = cv2.imread(os.path.join(save_dir, '00000.jpg')) - video_writer = cv2.VideoWriter(output_video_path, fourcc=cv2.VideoWriter_fourcc('M','J','P','G'), fps=30, frameSize=[img.shape[1],img.shape[0]]) + video_writer = cv2.VideoWriter( + output_video_path, + apiPreference=0, + fourcc=cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), + fps=30, + frameSize=(img.shape[1], img.shape[0])) for i in range(len(imgnames)): imgpath = os.path.join(save_dir, '{:05d}.jpg'.format(i)) img = cv2.imread(imgpath) diff --git a/modules/video/multiple_object_tracking/jde_darknet53/utils.py b/modules/video/multiple_object_tracking/jde_darknet53/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..4426f217f9f5fb5c7afa6593c2b83ce4b67236f9 --- /dev/null +++ b/modules/video/multiple_object_tracking/jde_darknet53/utils.py @@ -0,0 +1,39 @@ +import time + + +class Timer(object): + """ + This class used to compute and print the current FPS while evaling. + """ + + def __init__(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. + + def tic(self): + # using time.time instead of time.clock because time time.clock + # does not normalize for multithreading + self.start_time = time.time() + + def toc(self, average=True): + self.diff = time.time() - self.start_time + self.total_time += self.diff + self.calls += 1 + self.average_time = self.total_time / self.calls + if average: + self.duration = self.average_time + else: + self.duration = self.diff + return self.duration + + def clear(self): + self.total_time = 0. + self.calls = 0 + self.start_time = 0. + self.diff = 0. + self.average_time = 0. + self.duration = 0. diff --git a/paddlehub/__init__.py b/paddlehub/__init__.py index 06eab2c665fc58aa03ecd7ffa0c2d735d1167a4b..220459733796cfda7de290d7bcf01bd44a7e7503 100644 --- a/paddlehub/__init__.py +++ b/paddlehub/__init__.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = '2.1.0' +__version__ = '2.2.0' import paddle from packaging.version import Version @@ -33,11 +33,13 @@ from paddlehub.finetune import Trainer from paddlehub.utils import log, parser, utils from paddlehub.utils import download as _download from paddlehub.utils.paddlex import download, ResourceNotFoundError +from paddlehub.utils.platform import is_windows from paddlehub.server import server_check from paddlehub.server.server_source import ServerConnectionError from paddlehub.module import Module from paddlehub.text.bert_tokenizer import BertTokenizer, ErnieTinyTokenizer from paddlehub.text.tokenizer import CustomTokenizer +from paddlehub.text.utils import is_chinese_char # In order to maintain the compatibility of the old version, we put the relevant # compatible code in the paddlehub.compat package, and mapped some modules referenced @@ -66,3 +68,49 @@ common = EasyDict(paddle_helper=paddle_utils) dataset = EasyDict(Couplet=couplet.Couplet) finetune = EasyDict(strategy=EasyDict(ULMFiTStrategy=ULMFiTStrategy)) logger = EasyDict(logger=log.logger) + + +# Alias for paddle.hub.* +def load(*args, **kwargs): + if _paddle_version < Version('2.1.0') and _paddle_version != Version('0.0.0'): + raise RuntimeError( + '`hub.load` is only available in PaddlePaddle 2.1 and above, please upgrade the PaddlePaddle version.') + + from paddle.hub import load as phload + from paddlehub.server.server import CacheUpdater + + CacheUpdater("paddle.hub.load").start() + return phload(*args, **kwargs) + + +def list(*args, **kwargs): + if _paddle_version < Version('2.1.0') and _paddle_version != Version('0.0.0'): + raise RuntimeError( + '`hub.list` is only available in PaddlePaddle 2.1 and above, please upgrade the PaddlePaddle version.') + + from paddle.hub import list as phlist + from paddlehub.server.server import CacheUpdater + + CacheUpdater("paddle.hub.list").start() + return phlist(*args, **kwargs) + + +def help(*args, **kwargs): + if _paddle_version < Version('2.1.0') and _paddle_version != Version('0.0.0'): + raise RuntimeError( + '`hub.help` is only available in PaddlePaddle 2.1 and above, please upgrade the PaddlePaddle version.') + + from paddle.hub import help as phhelp + from paddlehub.server.server import CacheUpdater + + CacheUpdater("paddle.hub.help").start() + return phhelp(*args, **kwargs) + + +if is_windows(): + for char in env.HUB_HOME: + if is_chinese_char(char): + log.logger.warning( + 'The home directory contains Chinese characters which may cause unknown exceptions in the execution \ + of some modules. Please set another path through the set HUB_HOME command.') + break diff --git a/paddlehub/env.py b/paddlehub/env.py index 223bc2ffec1d3286d360a0d750bf15b857843345..0c8ff78546800a634a7996ff50806f9f60273ea3 100644 --- a/paddlehub/env.py +++ b/paddlehub/env.py @@ -52,8 +52,7 @@ def _get_hub_home(): def _get_sub_home(directory): home = os.path.join(_get_hub_home(), directory) - if not os.path.exists(home): - os.makedirs(home) + os.makedirs(home, exist_ok=True) return home diff --git a/paddlehub/finetune/trainer.py b/paddlehub/finetune/trainer.py index 041361d60704de14e53f14cec3c60d64d12f9d29..c2b01eb86f2e121c43d5fe387bfdf346f34703bf 100644 --- a/paddlehub/finetune/trainer.py +++ b/paddlehub/finetune/trainer.py @@ -18,8 +18,8 @@ import time from collections import defaultdict from typing import Any, Callable, Generic, List -import numpy as np import paddle +import numpy as np from visualdl import LogWriter from paddlehub.utils.log import logger @@ -82,6 +82,7 @@ class Trainer(object): if self.nranks > 1: paddle.distributed.init_parallel_env() self.model = paddle.DataParallel(self.model) + self.compare_metrics = self._compare_metrics if not compare_metrics else compare_metrics self._load_checkpoint() @@ -178,8 +179,14 @@ class Trainer(object): collate_fn(callable): function to generate mini-batch data by merging the sample list. None for only stack each fields of sample in axis 0(same as :attr::`np.stack(..., axis=0)`). Default None ''' - if eval_dataset is not None and not hasattr(self.model, 'validation_step'): - raise NotImplementedError('The specified finetuning model does not support evaluation.') + if eval_dataset is not None: + if isinstance(self.model, paddle.DataParallel): + model = self.model._layers + else: + model = self.model + + if not hasattr(model, 'validation_step'): + raise NotImplementedError('The specified finetuning model does not support evaluation.') batch_sampler = paddle.io.DistributedBatchSampler( train_dataset, batch_size=batch_size, shuffle=True, drop_last=False) @@ -294,24 +301,26 @@ class Trainer(object): collate_fn=collate_fn) self.model.eval() + avg_loss = num_samples = 0 sum_metrics = defaultdict(int) avg_metrics = defaultdict(int) with logger.processing('Evaluation on validation dataset'): - for batch_idx, batch in enumerate(loader): - result = self.validation_step(batch, batch_idx) + with paddle.no_grad(): + for batch_idx, batch in enumerate(loader): + result = self.validation_step(batch, batch_idx) - loss = result.get('loss', None) - metrics = result.get('metrics', {}) - bs = batch[0].shape[0] - num_samples += bs + loss = result.get('loss', None) + metrics = result.get('metrics', {}) + bs = batch[0].shape[0] + num_samples += bs - if loss: - avg_loss += loss.numpy()[0] * bs + if loss: + avg_loss += loss.numpy()[0] * bs - for metric, value in metrics.items(): - sum_metrics[metric] += value * bs + for metric, value in metrics.items(): + sum_metrics[metric] += value * bs # print avg metrics and loss print_msg = '[Evaluation result]' diff --git a/paddlehub/module/cv_module.py b/paddlehub/module/cv_module.py index 62e2a30350f23d4d5bb8099f1edaa18481c09231..c4a25a3c582961db83bc9b6e943e234d3f070b20 100644 --- a/paddlehub/module/cv_module.py +++ b/paddlehub/module/cv_module.py @@ -92,33 +92,34 @@ class ImageClassifierModule(RunModule, ImageServing): results(list[dict]) : The prediction result of each input image ''' self.eval() - res = [] - total_num = len(images) - loop_num = int(np.ceil(total_num / batch_size)) - - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - image = self.transforms(images[handle_id + image_id]) - batch_data.append(image) - except: - pass - batch_image = np.array(batch_data, dtype='float32') - preds, feature = self(paddle.to_tensor(batch_image)) - preds = F.softmax(preds, axis=1).numpy() - pred_idxs = np.argsort(preds)[:, ::-1][:, :top_k] - - for i, pred in enumerate(pred_idxs): - res_dict = {} - for k in pred: - class_name = self.labels[int(k)] - res_dict[class_name] = preds[i][k] - - res.append(res_dict) - - return res + with paddle.no_grad(): + res = [] + total_num = len(images) + loop_num = int(np.ceil(total_num / batch_size)) + + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + image = self.transforms(images[handle_id + image_id]) + batch_data.append(image) + except: + pass + batch_image = np.array(batch_data, dtype='float32') + preds, feature = self(paddle.to_tensor(batch_image)) + preds = F.softmax(preds, axis=1).numpy() + pred_idxs = np.argsort(preds)[:, ::-1][:, :top_k] + + for i, pred in enumerate(pred_idxs): + res_dict = {} + for k in pred: + class_name = self.labels[int(k)] + res_dict[class_name] = preds[i][k] + + res.append(res_dict) + + return res @serving def serving_method(self, images: list, top_k: int, **kwargs): @@ -223,54 +224,55 @@ class ImageColorizeModule(RunModule, ImageServing): res(list[dict]) : The prediction result of each input image ''' self.eval() - lab2rgb = T.LAB2RGB() - res = [] - total_num = len(images) - loop_num = int(np.ceil(total_num / batch_size)) - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - image = self.transforms(images[handle_id + image_id]) - batch_data.append(image) - except: - pass - batch_data = np.array(batch_data) - im = self.preprocess(batch_data) - out_class, out_reg = self(im['A'], im['hint_B'], im['mask_B']) - - visual_ret = OrderedDict() - for i in range(im['A'].shape[0]): - gray = lab2rgb(np.concatenate((im['A'].numpy(), np.zeros(im['B'].shape)), axis=1))[i] - gray = np.clip(np.transpose(gray, (1, 2, 0)), 0, 1) * 255 - visual_ret['gray'] = gray.astype(np.uint8) - hint = lab2rgb(np.concatenate((im['A'].numpy(), im['hint_B'].numpy()), axis=1))[i] - hint = np.clip(np.transpose(hint, (1, 2, 0)), 0, 1) * 255 - visual_ret['hint'] = hint.astype(np.uint8) - real = lab2rgb(np.concatenate((im['A'].numpy(), im['B'].numpy()), axis=1))[i] - real = np.clip(np.transpose(real, (1, 2, 0)), 0, 1) * 255 - visual_ret['real'] = real.astype(np.uint8) - fake = lab2rgb(np.concatenate((im['A'].numpy(), out_reg.numpy()), axis=1))[i] - fake = np.clip(np.transpose(fake, (1, 2, 0)), 0, 1) * 255 - visual_ret['fake_reg'] = fake.astype(np.uint8) - - if visualization: - if isinstance(images[handle_id + i], str): - org_img = cv2.imread(images[handle_id + i]).astype('float32') - else: - org_img = images[handle_id + i] - h, w, c = org_img.shape - fake_name = "fake_" + str(time.time()) + ".png" - if not os.path.exists(save_path): - os.mkdir(save_path) - fake_path = os.path.join(save_path, fake_name) - visual_gray = Image.fromarray(visual_ret['fake_reg']) - visual_gray = visual_gray.resize((w, h), Image.BILINEAR) - visual_gray.save(fake_path) - - res.append(visual_ret) - return res + with paddle.no_grad(): + lab2rgb = T.LAB2RGB() + res = [] + total_num = len(images) + loop_num = int(np.ceil(total_num / batch_size)) + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + image = self.transforms(images[handle_id + image_id]) + batch_data.append(image) + except: + pass + batch_data = np.array(batch_data) + im = self.preprocess(batch_data) + out_class, out_reg = self(im['A'], im['hint_B'], im['mask_B']) + + visual_ret = OrderedDict() + for i in range(im['A'].shape[0]): + gray = lab2rgb(np.concatenate((im['A'].numpy(), np.zeros(im['B'].shape)), axis=1))[i] + gray = np.clip(np.transpose(gray, (1, 2, 0)), 0, 1) * 255 + visual_ret['gray'] = gray.astype(np.uint8) + hint = lab2rgb(np.concatenate((im['A'].numpy(), im['hint_B'].numpy()), axis=1))[i] + hint = np.clip(np.transpose(hint, (1, 2, 0)), 0, 1) * 255 + visual_ret['hint'] = hint.astype(np.uint8) + real = lab2rgb(np.concatenate((im['A'].numpy(), im['B'].numpy()), axis=1))[i] + real = np.clip(np.transpose(real, (1, 2, 0)), 0, 1) * 255 + visual_ret['real'] = real.astype(np.uint8) + fake = lab2rgb(np.concatenate((im['A'].numpy(), out_reg.numpy()), axis=1))[i] + fake = np.clip(np.transpose(fake, (1, 2, 0)), 0, 1) * 255 + visual_ret['fake_reg'] = fake.astype(np.uint8) + + if visualization: + if isinstance(images[handle_id + i], str): + org_img = cv2.imread(images[handle_id + i]).astype('float32') + else: + org_img = images[handle_id + i] + h, w, c = org_img.shape + fake_name = "fake_" + str(time.time()) + ".png" + if not os.path.exists(save_path): + os.mkdir(save_path) + fake_path = os.path.join(save_path, fake_name) + visual_gray = Image.fromarray(visual_ret['fake_reg']) + visual_gray = visual_gray.resize((w, h), Image.BILINEAR) + visual_gray.save(fake_path) + + res.append(visual_ret) + return res @serving def serving_method(self, images: list, **kwargs): @@ -393,60 +395,61 @@ class Yolov3Module(RunModule, ImageServing): labels(np.ndarray): Predict labels. ''' self.eval() - boxes = [] - scores = [] - self.downsample = 32 - im = self.transform(imgpath) - h, w, c = utils.img_shape(imgpath) - im_shape = paddle.to_tensor(np.array([[h, w]]).astype('int32')) - label_names = utils.get_label_infos(filelist) - img_data = paddle.to_tensor(np.array([im]).astype('float32')) - - outputs = self(img_data) - - for i, out in enumerate(outputs): - anchor_mask = self.anchor_masks[i] - mask_anchors = [] - for m in anchor_mask: - mask_anchors.append((self.anchors[2 * m])) - mask_anchors.append(self.anchors[2 * m + 1]) - - box, score = F.yolo_box( - x=out, - img_size=im_shape, - anchors=mask_anchors, - class_num=self.class_num, - conf_thresh=self.valid_thresh, - downsample_ratio=self.downsample, - name="yolo_box" + str(i)) - - boxes.append(box) - scores.append(paddle.transpose(score, perm=[0, 2, 1])) - self.downsample //= 2 - - yolo_boxes = paddle.concat(boxes, axis=1) - yolo_scores = paddle.concat(scores, axis=2) - - pred = F.multiclass_nms( - bboxes=yolo_boxes, - scores=yolo_scores, - score_threshold=self.valid_thresh, - nms_top_k=self.nms_topk, - keep_top_k=self.nms_posk, - nms_threshold=self.nms_thresh, - background_label=-1) - - bboxes = pred.numpy() - labels = bboxes[:, 0].astype('int32') - scores = bboxes[:, 1].astype('float32') - boxes = bboxes[:, 2:].astype('float32') - - if visualization: - if not os.path.exists(save_path): - os.mkdir(save_path) - utils.draw_boxes_on_image(imgpath, boxes, scores, labels, label_names, 0.5, save_path) - - return boxes, scores, labels + with paddle.no_grad(): + boxes = [] + scores = [] + self.downsample = 32 + im = self.transform(imgpath) + h, w, c = utils.img_shape(imgpath) + im_shape = paddle.to_tensor(np.array([[h, w]]).astype('int32')) + label_names = utils.get_label_infos(filelist) + img_data = paddle.to_tensor(np.array([im]).astype('float32')) + + outputs = self(img_data) + + for i, out in enumerate(outputs): + anchor_mask = self.anchor_masks[i] + mask_anchors = [] + for m in anchor_mask: + mask_anchors.append((self.anchors[2 * m])) + mask_anchors.append(self.anchors[2 * m + 1]) + + box, score = F.yolo_box( + x=out, + img_size=im_shape, + anchors=mask_anchors, + class_num=self.class_num, + conf_thresh=self.valid_thresh, + downsample_ratio=self.downsample, + name="yolo_box" + str(i)) + + boxes.append(box) + scores.append(paddle.transpose(score, perm=[0, 2, 1])) + self.downsample //= 2 + + yolo_boxes = paddle.concat(boxes, axis=1) + yolo_scores = paddle.concat(scores, axis=2) + + pred = F.multiclass_nms( + bboxes=yolo_boxes, + scores=yolo_scores, + score_threshold=self.valid_thresh, + nms_top_k=self.nms_topk, + keep_top_k=self.nms_posk, + nms_threshold=self.nms_thresh, + background_label=-1) + + bboxes = pred.numpy() + labels = bboxes[:, 0].astype('int32') + scores = bboxes[:, 1].astype('float32') + boxes = bboxes[:, 2:].astype('float32') + + if visualization: + if not os.path.exists(save_path): + os.mkdir(save_path) + utils.draw_boxes_on_image(imgpath, boxes, scores, labels, label_names, 0.5, save_path) + + return boxes, scores, labels class StyleTransferModule(RunModule, ImageServing): @@ -521,37 +524,38 @@ class StyleTransferModule(RunModule, ImageServing): output(list[np.ndarray]) : The style transformed images with bgr mode. ''' self.eval() - style = paddle.to_tensor(self.transform(style).astype('float32')) - style = style.unsqueeze(0) - - res = [] - total_num = len(origin) - loop_num = int(np.ceil(total_num / batch_size)) - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - image = self.transform(origin[handle_id + image_id]) - batch_data.append(image.astype('float32')) - except: - pass - - batch_image = np.array(batch_data) - content = paddle.to_tensor(batch_image) - - self.setTarget(style) - output = self(content) - for num in range(batch_size): - out = paddle.clip(output[num].transpose((1, 2, 0)), 0, 255).numpy().astype(np.uint8) - res.append(out) - if visualization: - style_name = "style_" + str(time.time()) + ".png" - if not os.path.exists(save_path): - os.mkdir(save_path) - path = os.path.join(save_path, style_name) - cv2.imwrite(path, out) - return res + with paddle.no_grad(): + style = paddle.to_tensor(self.transform(style).astype('float32')) + style = style.unsqueeze(0) + + res = [] + total_num = len(origin) + loop_num = int(np.ceil(total_num / batch_size)) + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + image = self.transform(origin[handle_id + image_id]) + batch_data.append(image.astype('float32')) + except: + pass + + batch_image = np.array(batch_data) + content = paddle.to_tensor(batch_image) + + self.setTarget(style) + output = self(content) + for num in range(batch_size): + out = paddle.clip(output[num].transpose((1, 2, 0)), 0, 255).numpy().astype(np.uint8) + res.append(out) + if visualization: + style_name = "style_" + str(time.time()) + ".png" + if not os.path.exists(save_path): + os.mkdir(save_path) + path = os.path.join(save_path, style_name) + cv2.imwrite(path, out) + return res @serving def serving_method(self, images: list, **kwargs): @@ -655,47 +659,48 @@ class ImageSegmentationModule(ImageServing, RunModule): output(list[np.ndarray]) : The segmentation mask. ''' self.eval() - result = [] - - total_num = len(images) - loop_num = int(np.ceil(total_num / batch_size)) - for iter_id in range(loop_num): - batch_data = [] - handle_id = iter_id * batch_size - for image_id in range(batch_size): - try: - image, _ = self.transform(images[handle_id + image_id]) - batch_data.append(image) - except: - pass - batch_image = np.array(batch_data).astype('float32') - pred = self(paddle.to_tensor(batch_image)) - pred = paddle.argmax(pred[0], axis=1, keepdim=True, dtype='int32') - - for num in range(pred.shape[0]): - if isinstance(images[handle_id + num], str): - image = cv2.imread(images[handle_id + num]) - else: - image = images[handle_id + num] - h, w, c = image.shape - pred_final = utils.reverse_transform(pred[num:num + 1], (h, w), self.transforms.transforms) - pred_final = paddle.squeeze(pred_final) - pred_final = pred_final.numpy().astype('uint8') - - if visualization: - added_image = utils.visualize(images[handle_id + num], pred_final, weight=0.6) - pred_mask = utils.get_pseudo_color_map(pred_final) - pred_image_path = os.path.join(save_path, 'image', str(time.time()) + ".png") - pred_mask_path = os.path.join(save_path, 'mask', str(time.time()) + ".png") - if not os.path.exists(os.path.dirname(pred_image_path)): - os.makedirs(os.path.dirname(pred_image_path)) - if not os.path.exists(os.path.dirname(pred_mask_path)): - os.makedirs(os.path.dirname(pred_mask_path)) - cv2.imwrite(pred_image_path, added_image) - pred_mask.save(pred_mask_path) - - result.append(pred_final) - return result + with paddle.no_grad(): + result = [] + + total_num = len(images) + loop_num = int(np.ceil(total_num / batch_size)) + for iter_id in range(loop_num): + batch_data = [] + handle_id = iter_id * batch_size + for image_id in range(batch_size): + try: + image, _ = self.transform(images[handle_id + image_id]) + batch_data.append(image) + except: + pass + batch_image = np.array(batch_data).astype('float32') + pred = self(paddle.to_tensor(batch_image)) + pred = paddle.argmax(pred[0], axis=1, keepdim=True, dtype='int32') + + for num in range(pred.shape[0]): + if isinstance(images[handle_id + num], str): + image = cv2.imread(images[handle_id + num]) + else: + image = images[handle_id + num] + h, w, c = image.shape + pred_final = utils.reverse_transform(pred[num:num + 1], (h, w), self.transforms.transforms) + pred_final = paddle.squeeze(pred_final) + pred_final = pred_final.numpy().astype('uint8') + + if visualization: + added_image = utils.visualize(images[handle_id + num], pred_final, weight=0.6) + pred_mask = utils.get_pseudo_color_map(pred_final) + pred_image_path = os.path.join(save_path, 'image', str(time.time()) + ".png") + pred_mask_path = os.path.join(save_path, 'mask', str(time.time()) + ".png") + if not os.path.exists(os.path.dirname(pred_image_path)): + os.makedirs(os.path.dirname(pred_image_path)) + if not os.path.exists(os.path.dirname(pred_mask_path)): + os.makedirs(os.path.dirname(pred_mask_path)) + cv2.imwrite(pred_image_path, added_image) + pred_mask.save(pred_mask_path) + + result.append(pred_final) + return result @serving def serving_method(self, images: List[str], **kwargs): diff --git a/paddlehub/server/git_source.py b/paddlehub/server/git_source.py index 4496d081fef2bc53c574f79641d741a8402c608c..dbd46e19559febf25cf7fa32ab8dbd50d90316db 100644 --- a/paddlehub/server/git_source.py +++ b/paddlehub/server/git_source.py @@ -21,12 +21,6 @@ import sys from collections import OrderedDict from typing import List -# For some environments where git is not installed, we need to set this environment -# variable to avoid errors. -os.environ['GIT_PYTHON_REFRESH'] = 'quiet' -import git -from git import Repo - from paddlehub.module.module import RunModule from paddlehub.env import SOURCES_HOME from paddlehub.utils import log, utils @@ -42,6 +36,11 @@ class GitSource(object): ''' def __init__(self, url: str, path: str = None): + # For some environments where git is not installed, we need to set this environment + # variable to avoid errors. + os.environ['GIT_PYTHON_REFRESH'] = 'quiet' + from git import Repo + self.url = url self.path = os.path.join(SOURCES_HOME, utils.md5(url)) @@ -152,6 +151,8 @@ class GitSource(object): Args: url(str) : Url to check ''' + import git + try: git.cmd.Git().ls_remote(url) return True diff --git a/paddlehub/utils/pypi.py b/paddlehub/utils/pypi.py index 8f0f3c68c4a254138b0d647b81d484803735ede6..6a6d76535f25b13e46448e72eef7852c9d9f9654 100644 --- a/paddlehub/utils/pypi.py +++ b/paddlehub/utils/pypi.py @@ -15,10 +15,11 @@ import os import subprocess +import sys from typing import IO from paddlehub.utils.utils import Version -from paddlehub.utils.io import discard_oe, typein +from paddlehub.utils.io import discard_oe def get_installed_packages() -> dict: @@ -40,13 +41,14 @@ def check(package: str, version: str = '') -> bool: return pdict[package].match(version) -def install(package: str, version: str = '', upgrade: bool = False, ostream: IO = None, estream: IO = None) -> bool: +def install(package: str, version: str = '', upgrade: bool = False, ostream: IO = sys.stdout, + estream: IO = sys.stderr) -> bool: '''Install the python package.''' package = package.replace(' ', '') if version: package = '{}=={}'.format(package, version) - cmd = 'pip install "{}"'.format(package) + cmd = '{} -m pip install "{}"'.format(sys.executable, package) if upgrade: cmd += ' --upgrade' @@ -59,9 +61,9 @@ def install(package: str, version: str = '', upgrade: bool = False, ostream: IO return result == 0 -def install_from_file(file: str, ostream: IO = None, estream: IO = None) -> bool: +def install_from_file(file: str, ostream: IO = sys.stdout, estream: IO = sys.stderr) -> bool: '''Install the python package.''' - cmd = 'pip install -r {}'.format(file) + cmd = '{} -m pip install -r {}'.format(sys.executable, file) result, content = subprocess.getstatusoutput(cmd) if result: @@ -71,14 +73,13 @@ def install_from_file(file: str, ostream: IO = None, estream: IO = None) -> bool return result == 0 -def uninstall(package: str, ostream: IO = None, estream: IO = None) -> bool: +def uninstall(package: str, ostream: IO = sys.stdout, estream: IO = sys.stderr) -> bool: '''Uninstall the python package.''' - with typein('y'): - # type in 'y' to confirm the uninstall operation - cmd = 'pip uninstall {}'.format(package) - result, content = subprocess.getstatusoutput(cmd) - if result: - estream.write(content) - else: - ostream.write(content) + # type in 'y' to confirm the uninstall operation + cmd = '{} -m pip uninstall {} -y'.format(sys.executable, package) + result, content = subprocess.getstatusoutput(cmd) + if result: + estream.write(content) + else: + ostream.write(content) return result == 0 diff --git a/paddlehub/vision/utils.py b/paddlehub/vision/utils.py index 826b6a281246c1ed6c7cc4b7f42f35bd224ac204..7d1e216d4bbd1de573d52b705c91482e8ca3471e 100644 --- a/paddlehub/vision/utils.py +++ b/paddlehub/vision/utils.py @@ -19,7 +19,6 @@ import cv2 import paddle import PIL import numpy as np -import matplotlib as plt import paddle.nn.functional as F from scipy.sparse import csr_matrix @@ -107,6 +106,10 @@ def draw_boxes_on_image(image_path: str, score_thresh: float = 0.5, save_path: str = 'result'): """Draw boxes on images.""" + # On windows, importing paddlenlp and matplotlib at the same time will cause python + # to fail to catch C++ exceptions. Delay matplotlib to avoid this problem. + import matplotlib as plt + image = np.array(PIL.Image.open(image_path)) plt.figure() _, ax = plt.subplots(1) diff --git a/requirements.txt b/requirements.txt index 31f4aca0ea20a9364a68c2d7e4e43817422b95a8..f95cfe689940dbd35f750b4267f2804d8a6fe53b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,13 +3,12 @@ colorlog easydict filelock flask >= 1.1.0 -gitpython numpy matplotlib opencv-python packaging paddle2onnx >= 0.5.1 -paddlenlp >= 2.0.0rc5 +paddlenlp >= 2.0.0 Pillow pyyaml pyzmq