diff --git a/.github/ISSUE_TEMPLATE/----.md b/.github/ISSUE_TEMPLATE/----.md new file mode 100644 index 0000000000000000000000000000000000000000..376fda11b8c9575f4323d48e1cd6ffe9ec222fbd --- /dev/null +++ b/.github/ISSUE_TEMPLATE/----.md @@ -0,0 +1,20 @@ +--- +name: 使用咨询 +about: 使用方法咨询 +title: "[HOW TO]" +labels: question +assignees: '' + +--- + +欢迎您使用PaddleClas并反馈相关问题,非常感谢您对PaddleClas的贡献! +提出issue时,辛苦您提供以下信息,方便我们快速定位问题并及时有效地解决您的问题。如果您的问题比较复杂,想与我们直接交流,可以首页扫码加入微信群。 + +### 使用场景 + +### 数据情况 + +### 问题描述 + +### 预期效果 + diff --git a/.github/ISSUE_TEMPLATE/---clas-issue-.md b/.github/ISSUE_TEMPLATE/---clas-issue-.md index 01bd38f9b243a6b1ab357219b50f0065db708529..fb687fa6acc34a46934453a3d72c1af64d848314 100644 --- a/.github/ISSUE_TEMPLATE/---clas-issue-.md +++ b/.github/ISSUE_TEMPLATE/---clas-issue-.md @@ -1,18 +1,36 @@ --- name: 问题反馈 about: PaddleClas问题反馈 -title: '' -labels: '' +title: "[BUG]" +labels: bug assignees: '' --- 欢迎您使用PaddleClas并反馈相关问题,非常感谢您对PaddleClas的贡献! 提出issue时,辛苦您提供以下信息,方便我们快速定位问题并及时有效地解决您的问题: - 1. PaddleClas版本以及PaddlePaddle版本:请您提供您使用的版本号或分支信息,如PaddleClas release/2.2和PaddlePaddle 2.1.0 - 2. 涉及的其他产品使用的版本号:如您在使用PaddleClas的同时还在使用其他产品,如PaddleServing、PaddleInference等,请您提供其版本号 - 3. 训练环境信息: - a. 具体操作系统,如Linux/Windows/MacOS - b. Python版本号,如Python3.6/7/8 - c. CUDA/cuDNN版本, 如CUDA10.2/cuDNN 7.6.5等 - 4. 完整的代码(相比于repo中代码,有改动的地方)、详细的错误信息及相关log + +### 必要信息 +** 如果您不能提供以下必要信息,可能会影响问题解决的速度 ** +#### 1. PaddleClas版本以及PaddlePaddle版本 + +#### 2. 最小可复现问题的方法 +##### 2.1 原始代码问题 + +##### 2.2 二次开发问题 + +#### 3. 报错信息和log + +### 补充信息 + +#### 1.训练环境信息: +##### 1.1 操作系统 +##### 1.2 Python版本号 +##### 1.3 CUDA/cuDNN版本 +##### 1.4 涉及的其他产品的版本号 + +#### 2.修复建议 + diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 0000000000000000000000000000000000000000..786ee5561fcf3eb525ebcbe81de9b01b1ca2b68b --- /dev/null +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,18 @@ +--- +name: 功能需求 +about: 新功能需求 +title: "[FEATURE]" +labels: question +assignees: '' + +--- + +欢迎您使用PaddleClas并反馈相关问题,非常感谢您对PaddleClas的贡献! +提出issue时,辛苦您提供以下信息,方便我们快速定位问题并及时有效地解决您的问题。如果您的问题比较复杂,想与我们直接交流,可以首页扫码加入微信群。 + +### 使用场景 + +### 预期效果 + +### 效果参考 + diff --git a/README.md b/README.md index a80b5f53c19fb642795c3dfee16a40d4ed9d9df5..44885f554afdc7e00188fae2987e7fbbb4278fcc 120000 --- a/README.md +++ b/README.md @@ -1 +1 @@ -README_ch.md +README_ch.md \ No newline at end of file diff --git a/README_ch.md b/README_ch.md index 0619f91fe8266557d75a97c9305d1773af31ff85..74b71bbeec06c9a207291a19961077ef141841a0 100644 --- a/README_ch.md +++ b/README_ch.md @@ -8,7 +8,7 @@ **近期更新** -- 2022.1.27 全面升级文档;新增[PaddleServing C++ pipeline部署方式](./deploy/paddleserving/readme.md)和[18M图像识别安卓部署Demo](./deploy/lite_shitu/README.md)。 +- 2022.1.27 全面升级文档;新增[PaddleServing C++ pipeline部署方式](./deploy/paddleserving)和[18M图像识别安卓部署Demo](./deploy/lite_shitu)。 - 2021.11.1 发布[PP-ShiTu技术报告](https://arxiv.org/pdf/2111.00775.pdf),新增饮料识别demo - 2021.10.23 发布轻量级图像识别系统PP-ShiTu,CPU上0.2s即可完成在10w+库的图像识别。 [点击这里](./docs/zh_CN/quick_start/quick_start_recognition.md)立即体验 @@ -38,7 +38,7 @@ Res2Net200_vd预训练模型Top-1精度高达85.1%。 * 您可以扫描下面的微信群二维码, 加入PaddleClas 微信交流群。获得更高效的问题答疑,与各行各业开发者充分交流,期待您的加入。
- +
## 快速体验 diff --git a/README_en.md b/README_en.md index e86ed3fe1d8e39d8900b5f5da42711966295d61e..57b809d6be08d102d5f678d53a630023b21f758d 100644 --- a/README_en.md +++ b/README_en.md @@ -41,7 +41,7 @@ Four sample solutions are provided, including product recognition, vehicle recog * You can also scan the QR code below to join the PaddleClas WeChat group to get more efficient answers to your questions and to communicate with developers from all walks of life. We look forward to hearing from you.
- +
## Quick Start diff --git a/deploy/configs/inference_drink.yaml b/deploy/configs/inference_drink.yaml index 61d116f9f407ca032e59132ec3971622c9df9a6e..d044965f446634dcc151fd496a9d7b403b869d68 100644 --- a/deploy/configs/inference_drink.yaml +++ b/deploy/configs/inference_drink.yaml @@ -51,8 +51,8 @@ RecPostProcess: null # indexing engine config IndexProcess: index_method: "HNSW32" # supported: HNSW32, IVF, Flat - index_dir: "./drink_dataset_v1.0/gallery" - image_root: "./drink_dataset_v1.0/index" + image_root: "./drink_dataset_v1.0/gallery" + index_dir: "./drink_dataset_v1.0/index" data_file: "./drink_dataset_v1.0/gallery/drink_label.txt" index_operation: "new" # suported: "append", "remove", "new" delimiter: " " diff --git a/deploy/utils/predictor.py b/deploy/utils/predictor.py index d44ae03df57def97bed6ab91205c327c3d318a2d..7fd1d6dccb61b86f1fece2e3a909c7005f93ca8a 100644 --- a/deploy/utils/predictor.py +++ b/deploy/utils/predictor.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import platform import os import argparse import base64 @@ -50,8 +51,10 @@ class Predictor(object): else: config.disable_gpu() if args.enable_mkldnn: - # cache 10 different shapes for mkldnn to avoid memory leak - config.set_mkldnn_cache_capacity(10) + # there is no set_mkldnn_cache_capatity() on macOS + if platform.system() != "Darwin": + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) config.enable_mkldnn() config.set_cpu_math_library_num_threads(args.cpu_num_threads) diff --git a/docs/en/advanced_tutorials/index.rst b/docs/en/advanced_tutorials/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e741733e7added372c21e3e40896f5bd39d21727 --- /dev/null +++ b/docs/en/advanced_tutorials/index.rst @@ -0,0 +1,12 @@ +advanced_tutorials +================================ + +.. toctree:: + :maxdepth: 2 + + DataAugmentation_en.md + distillation/index + multilabel/index + model_prune_quantization_en.md + code_overview_en.md + how_to_contribute_en.md diff --git a/docs/en/advanced_tutorials/multilabel/index.rst b/docs/en/advanced_tutorials/multilabel/index.rst index 1e8acfdfb3c0d93101f78fdee05ce364041c1882..07e0a54a23fc30871e1b4265002760a9f41c18a1 100644 --- a/docs/en/advanced_tutorials/multilabel/index.rst +++ b/docs/en/advanced_tutorials/multilabel/index.rst @@ -4,4 +4,4 @@ Multilabel Classification .. toctree:: :maxdepth: 3 - multilabel.md \ No newline at end of file + multilabel_en.md \ No newline at end of file diff --git a/docs/en/algorithm_introduction/DataAugmentation_en.md b/docs/en/algorithm_introduction/DataAugmentation_en.md index baa9ca5e1fe5e18635b4b2571f6ec6eddc4a5004..9ada02ffd10b3e5c3355738220c8e1713aa412e2 100644 --- a/docs/en/algorithm_introduction/DataAugmentation_en.md +++ b/docs/en/algorithm_introduction/DataAugmentation_en.md @@ -23,7 +23,7 @@ Data augmentation is a commonly used regularization method in image classification task, which is often used in scenarios with insufficient data or large model. In this chapter, we mainly introduce 8 image augmentation methods besides standard augmentation methods. Users can apply these methods in their own tasks for better model performance. Under the same conditions, these augmentation methods' performance on ImageNet1k dataset is shown as follows. -![](../../../images/image_aug/main_image_aug.png) +![](../../images/image_aug/main_image_aug.png) @@ -50,7 +50,7 @@ Compared with the above standard image augmentation methods, the researchers hav Visualization results of some images after augmentation are shown as follows. -![](../../../images/image_aug/image_aug_samples_s_en.jpg) +![](../../images/image_aug/image_aug_samples_s_en.jpg) The following table shows more detailed information of the transformations. @@ -72,7 +72,7 @@ The following table shows more detailed information of the transformations. PaddleClas integrates all the above data augmentation strategies. More details including principles and usage of the strategies are introduced in the following chapters. For better visualization, we use the following figure to show the changes after the transformations. And `RandCrop` is replaced with` Resize` for simplification. -![](../../../images/image_aug/test_baseline.jpeg) +![](../../images/image_aug/test_baseline.jpeg) ### 2.1 Image Transformation @@ -91,7 +91,7 @@ Unlike conventional artificially designed image augmentation methods, AutoAugmen The images after `AutoAugment` are as follows. -![][test_autoaugment] +![](../../images/image_aug/test_autoaugment.jpeg) #### 2.1.2 RandAugment @@ -107,7 +107,7 @@ In `RandAugment`, the author proposes a random augmentation method. Instead of u The images after `RandAugment` are as follows. -![][test_randaugment] +![](../../images/image_aug/test_randaugment.jpeg) #### 2.1.3 TimmAutoAugment @@ -137,7 +137,7 @@ Cutout is a kind of dropout, but occludes input image rather than feature map. I The images after `Cutout` are as follows. -![][test_cutout] +![](../../images/image_aug/test_cutout.jpeg) #### 2.2.2 RandomErasing @@ -150,7 +150,7 @@ RandomErasing is similar to the Cutout. It is also to solve the problem of poor The images after `RandomErasing` are as follows. -![][test_randomerassing] +![](../../images/image_aug/test_randomerassing.jpeg) #### 2.2.3 HideAndSeek @@ -162,11 +162,11 @@ Github repo: [https://github.com/kkanshul/Hide-and-Seek](https://github.com/kkan Images are divided into some patches for `HideAndSeek` and masks are generated with certain probability for each patch. The meaning of the masks in different areas is shown in the figure below. -![][hide_and_seek_mask_expanation] +![](../../images/image_aug/hide-and-seek-visual.png) The images after `HideAndSeek` are as follows. -![][test_hideandseek] +![](../../images/image_aug/gridmask-0.png) #### 2.2.4 GridMask @@ -180,7 +180,7 @@ The author points out that the previous method based on image cropping has two p 1. Excessive deletion of the area may cause most or all of the target subject to be deleted, or cause the context information loss, resulting in the images after enhancement becoming noisy data. 2. Reserving too much area has little effect on the object and context. -![][gridmask-0] +![](../../images/image_aug/hide-and-seek-visual.png) Therefore, it is the core problem to be solved how to if you avoid over-deletion or over-retention becomes the core problem to be solved. @@ -195,7 +195,7 @@ It shows that the second method is better. The images after `GridMask` are as follows. -![][test_gridmask] +![](../../images/image_aug/test_gridmask.jpeg) ### 2.3 Image mix @@ -215,7 +215,7 @@ Mixup is the first solution for image aliasing, it is easy to realize and perfor The images after `Mixup` are as follows. -![][test_mixup] +![](../../images/image_aug/test_mixup.png) #### 2.3.2 Cutmix @@ -229,7 +229,7 @@ Cutmix randomly cuts out an `ROI` from one image, and then covered onto the corr The images after `Cutmix` are as follows. -![][test_cutmix] +![](../../images/image_aug/test_cutmix.png) For the practical part of data augmentation, please refer to [Data Augmentation Practice](../advanced_tutorials/DataAugmentation_en.md). diff --git a/docs/en/algorithm_introduction/ImageNet_models_en.md b/docs/en/algorithm_introduction/ImageNet_models_en.md index 54b3b5211f1004d73cfca620a80af6256e97fe89..c9d0a7270ad74d9586bc087dea87a69502f64fa1 100644 --- a/docs/en/algorithm_introduction/ImageNet_models_en.md +++ b/docs/en/algorithm_introduction/ImageNet_models_en.md @@ -28,6 +28,7 @@ - [21. RedNet series](#21) - [22. TNT series](#22) - [23. Other models](#23) +- [Reference](#reference) @@ -42,21 +43,15 @@ Based on the ImageNet-1k classification dataset, the 37 classification network s Curves of accuracy to the inference time of common server-side models are shown as follows. -
- -
+![](../../images/models/V100_benchmark/v100.fp32.bs1.main_fps_top1_s.png) Curves of accuracy to the inference time of common mobile-side models are shown as follows. -
- -
+![](../../images/models/mobile_arm_top1.png) Curves of accuracy to the inference time of some VisionTransformer models are shown as follows. -
- -
+![](../../images/models/V100_benchmark/v100.fp32.bs1.visiontransformer.png) @@ -69,7 +64,7 @@ Accuracy and inference time of the prtrained models based on SSLD distillation a | Model | Top-1 Acc | Reference
Top-1 Acc | Acc gain | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | Pretrained Model Download Address | Inference Model Download Address | |---------------------|-----------|-----------|---------------|----------------|-----------|----------|-----------|-----------------------------------|-----------------------------------|-----------------------------------| -| ResNet34_vd_ssld | 0.797 | 0.760 | 0.037 | 2.00 | 3.28 | 5.84 | 3.93 | 21.84 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_ssld_pretrained.pdparams)   | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet34_vd_ssld.tar)   | +| ResNet34_vd_ssld | 0.797 | 0.760 | 0.037 | 2.00 | 3.28 | 5.84 | 3.93 | 21.84 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_ssld_pretrained.pdparams)   | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet34_vd_ssld_infer.tar)   | | ResNet50_vd_ssld | 0.830 | 0.792 | 0.039 | 2.60 | 4.86 | 7.63 | 4.35 | 25.63 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_vd_ssld_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_ssld_infer.tar) | | ResNet101_vd_ssld | 0.837 | 0.802 | 0.035 | 4.43 | 8.25 | 12.60 | 8.08 | 44.67 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_vd_ssld_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet101_vd_ssld_infer.tar) | | Res2Net50_vd_26w_4s_ssld | 0.831 | 0.798 | 0.033 | 3.59 | 6.35 | 9.50 | 4.28 | 25.76 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_ssld_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/Res2Net50_vd_26w_4s_ssld_infer.tar) | @@ -107,7 +102,7 @@ Accuracy and inference time of the prtrained models based on SSLD distillation a -## 3. PP-LCNet series +## 3. PP-LCNet series [[28](#ref28)] The accuracy and speed indicators of the PP-LCNet series models are shown in the following table. For more information about this series of models, please refer to: [PP-LCNet series model documents](../models/PP-LCNet_en.md)。 @@ -124,7 +119,7 @@ The accuracy and speed indicators of the PP-LCNet series models are shown in the -## 4. ResNet series +## 4. ResNet series [[1](#ref1)] The accuracy and speed indicators of ResNet and ResNet_vd series models are shown in the following table. For more information about this series of models, please refer to: [ResNet and ResNet_vd series model documents](../models/ResNet_and_vd_en.md)。 @@ -148,7 +143,7 @@ The accuracy and speed indicators of ResNet and ResNet_vd series models are show -## 5. Mobile series +## 5. Mobile series [[3](#ref3)][[4](#ref4)][[5](#ref5)][[6](#ref6)][[23](#ref23)] The accuracy and speed indicators of the mobile series models are shown in the following table. For more information about this series, please refer to: [Mobile series model documents](../models/Mobile_en.md)。 @@ -197,7 +192,7 @@ The accuracy and speed indicators of the mobile series models are shown in the f -## 6. SEResNeXt and Res2Net series +## 6. SEResNeXt and Res2Net series [[7](#ref7)][[8](#ref8)][[9](#ref9)] The accuracy and speed indicators of the SEResNeXt and Res2Net series models are shown in the following table. For more information about the models of this series, please refer to: [SEResNeXt and Res2Net series model documents](../models/SEResNext_and_Res2Net_en.md). @@ -232,7 +227,7 @@ The accuracy and speed indicators of the SEResNeXt and Res2Net series models are -## 7. DPN and DenseNet series +## 7. DPN and DenseNet series [[14](#ref14)][[15](#ref15)] The accuracy and speed indicators of the DPN and DenseNet series models are shown in the following table. For more information about the models of this series, please refer to: [DPN and DenseNet series model documents](../models/DPN_DenseNet_en.md). @@ -250,11 +245,9 @@ The accuracy and speed indicators of the DPN and DenseNet series models are show | DPN107 | 0.8089 | 0.9532 | 19.46 | 35.62 | 50.22 | 18.38 | 87.13 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DPN107_infer.tar) | | DPN131 | 0.8070 | 0.9514 | 19.64 | 34.60 | 47.42 | 16.09 | 79.48 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DPN131_infer.tar) | - - -## 8. HRNet series +## 8. HRNet series [[13](#ref13)] The accuracy and speed indicators of the HRNet series models are shown in the following table. For more information about the models of this series, please refer to: [HRNet series model documents](../models/HRNet_en.md). @@ -274,7 +267,7 @@ The accuracy and speed indicators of the HRNet series models are shown in the fo -## 9. Inception series +## 9. Inception series [[10](#ref10)][[11](#ref11)][[12](#ref12)][[26](#ref26)] The accuracy and speed indicators of the Inception series models are shown in the following table. For more information about this series of models, please refer to: [Inception series model documents](../models/Inception_en.md). @@ -291,7 +284,7 @@ The accuracy and speed indicators of the Inception series models are shown in th -## 10. EfficientNet and ResNeXt101_wsl series +## 10. EfficientNet and ResNeXt101_wsl series [[16](#ref16)][[17](#ref17)] The accuracy and speed indicators of the EfficientNet and ResNeXt101_wsl series models are shown in the following table. For more information about this series of models, please refer to: [EfficientNet and ResNeXt101_wsl series model documents](../models/EfficientNet_and_ResNeXt101_wsl_en.md). @@ -314,7 +307,7 @@ The accuracy and speed indicators of the EfficientNet and ResNeXt101_wsl series -## 11. ResNeSt and RegNet series +## 11. ResNeSt and RegNet series [[24](#ref24)][[25](#ref25)] The accuracy and speed indicators of the ResNeSt and RegNet series models are shown in the following table. For more information about the models of this series, please refer to: [ResNeSt and RegNet series model documents](../models/ResNeSt_RegNet_en.md). @@ -326,11 +319,10 @@ The accuracy and speed indicators of the ResNeSt and RegNet series models are sh -## 12. ViT and DeiT series +## 12. ViT and DeiT series [[31](#ref31)][[32](#ref32)] The accuracy and speed indicators of ViT (Vision Transformer) and DeiT (Data-efficient Image Transformers) series models are shown in the following table. For more information about this series of models, please refer to: [ViT_and_DeiT series model documents](../models/ViT_and_DeiT_en.md). - | Model | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | Pretrained Model Download Address | Inference Model Download Address | |------------------------|-----------|-----------|------------------|------------------|----------|------------------------|------------------------|------------------------|------------------------| | ViT_small_
patch16_224 | 0.7769 | 0.9342 | 3.71 | 9.05 | 16.72 | 9.41 | 48.60 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_small_patch16_224_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ViT_small_patch16_224_infer.tar) | @@ -341,8 +333,6 @@ The accuracy and speed indicators of ViT (Vision Transformer) and DeiT (Data-eff |ViT_large_
patch16_384| 0.8513 | 0.9736 | 39.51 | 152.46 | 304.06 | 174.70 | 304.12 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ViT_large_patch16_384_infer.tar) | |ViT_large_
patch32_384| 0.8153 | 0.9608 | 11.44 | 36.09 | 70.63 | 44.24 | 306.48 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ViT_large_patch32_384_infer.tar) | - - | Model | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | Pretrained Model Download Address | Inference Model Download Address | |------------------------|-----------|-----------|------------------|------------------|----------|------------------------|------------------------|------------------------|------------------------| | DeiT_tiny_
patch16_224 | 0.718 | 0.910 | 3.61 | 3.94 | 6.10 | 1.07 | 5.68 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DeiT_tiny_patch16_224_infer.tar) | @@ -356,11 +346,10 @@ The accuracy and speed indicators of ViT (Vision Transformer) and DeiT (Data-eff -## 13. RepVGG series +## 13. RepVGG series [[36](#ref36)] The accuracy and speed indicators of RepVGG series models are shown in the following table. For more introduction, please refer to: [RepVGG series model documents](../models/RepVGG_en.md). - | Model | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | Pretrained Model Download Address | Inference Model Download Address | |------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| | RepVGG_A0 | 0.7131 | 0.9016 | | | | 1.36 | 8.31 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_A0_infer.tar) | @@ -376,7 +365,7 @@ The accuracy and speed indicators of RepVGG series models are shown in the follo -## 14. MixNet series +## 14. MixNet series [[29](#ref29)] The accuracy and speed indicators of the MixNet series models are shown in the following table. For more introduction, please refer to: [MixNet series model documents](../models/MixNet_en.md). @@ -388,7 +377,7 @@ The accuracy and speed indicators of the MixNet series models are shown in the f -## 15. ReXNet series +## 15. ReXNet series [[30](#ref30)] The accuracy and speed indicators of ReXNet series models are shown in the following table. For more introduction, please refer to: [ReXNet series model documents](../models/ReXNet_en.md). @@ -402,7 +391,7 @@ The accuracy and speed indicators of ReXNet series models are shown in the follo -## 16. SwinTransformer series +## 16. SwinTransformer series [[27](#ref27)] The accuracy and speed indicators of SwinTransformer series models are shown in the following table. For more introduction, please refer to: [SwinTransformer series model documents](../models/SwinTransformer_en.md). @@ -414,20 +403,20 @@ The accuracy and speed indicators of SwinTransformer series models are shown in | SwinTransformer_base_patch4_window12_384 | 0.8439 | 0.9693 | 19.52 | 64.56 | 123.30 | 44.45 | 87.70 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_base_patch4_window12_384_infer.tar) | | SwinTransformer_base_patch4_window7_224[1] | 0.8487 | 0.9746 | 13.53 | 23.46 | 39.13 | 15.13 | 87.70 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_22kto1k_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_base_patch4_window7_224_infer.tar) | | SwinTransformer_base_patch4_window12_384[1] | 0.8642 | 0.9807 | 19.65 | 64.72 | 123.42 | 44.45 | 87.70 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_22kto1k_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_base_patch4_window12_384_infer.tar) | -| SwinTransformer_large_patch4_window7_224[1] | 0.8596 | 0.9783 | 15.74 | 38.57 | 71.49 | 34.02 | 196.43 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window7_224_infer.tar) | -| SwinTransformer_large_patch4_window12_384[1] | 0.8719 | 0.9823 | 32.61 | 116.59 | 223.23 | 99.97 | 196.43 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window12_384_infer.tar) | +| SwinTransformer_large_patch4_window7_224[1] | 0.8596 | 0.9783 | 15.74 | 38.57 | 71.49 | 34.02 | 196.43 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window7_224_22kto1k_infer.tar) | +| SwinTransformer_large_patch4_window12_384[1] | 0.8719 | 0.9823 | 32.61 | 116.59 | 223.23 | 99.97 | 196.43 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window12_384_22kto1k_infer.tar) | [1]:It is pre-trained based on the ImageNet22k dataset, and then transferred and learned from the ImageNet1k dataset. -## 17. LeViT series +## 17. LeViT series [[33](#ref33)] The accuracy and speed indicators of LeViT series models are shown in the following table. For more introduction, please refer to: [LeViT series model documents](../models/LeViT_en.md). | Model | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(M) | Params(M) | Pretrained Model Download Address | Inference Model Download Address | | ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| LeViT_128S | 0.7598 | 0.9269 | | | | 281 | 7.42 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/eViT_128S_infer.tar) | +| LeViT_128S | 0.7598 | 0.9269 | | | | 281 | 7.42 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_128S_infer.tar) | | LeViT_128 | 0.7810 | 0.9371 | | | | 365 | 8.87 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_128_infer.tar) | | LeViT_192 | 0.7934 | 0.9446 | | | | 597 | 10.61 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_192_infer.tar) | | LeViT_256 | 0.8085 | 0.9497 | | | | 1049 | 18.45 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_256_infer.tar) | @@ -437,7 +426,7 @@ The accuracy and speed indicators of LeViT series models are shown in the follow -## 18. Twins series +## 18. Twins series [[34](#ref34)] The accuracy and speed indicators of Twins series models are shown in the following table. For more introduction, please refer to: [Twins series model documents](../models/Twins_en.md). @@ -454,7 +443,7 @@ The accuracy and speed indicators of Twins series models are shown in the follow -## 19. HarDNet series +## 19. HarDNet series [[37](#ref37)] The accuracy and speed indicators of HarDNet series models are shown in the following table. For more introduction, please refer to: [HarDNet series model documents](../models/HarDNet_en.md). @@ -467,7 +456,7 @@ The accuracy and speed indicators of HarDNet series models are shown in the foll -## 20. DLA series +## 20. DLA series [[38](#ref38)] The accuracy and speed indicators of DLA series models are shown in the following table. For more introduction, please refer to: [DLA series model documents](../models/DLA_en.md). @@ -485,7 +474,7 @@ The accuracy and speed indicators of DLA series models are shown in the followin -## 21. RedNet series +## 21. RedNet series [[39](#ref39)] The accuracy and speed indicators of RedNet series models are shown in the following table. For more introduction, please refer to: [RedNet series model documents](../models/RedNet_en.md). @@ -499,7 +488,7 @@ The accuracy and speed indicators of RedNet series models are shown in the follo -## 22. TNT series +## 22. TNT series [[35](#ref35)] The accuracy and speed indicators of TNT series models are shown in the following table. For more introduction, please refer to: [TNT series model documents](../models/TNT_en.md). @@ -513,7 +502,7 @@ The accuracy and speed indicators of TNT series models are shown in the followin ## 23. Other models -The accuracy and speed indicators of AlexNet, SqueezeNet series, VGG series, DarkNet53 and other models are shown in the following table. For more information, please refer to: [Other model documents](../models/Others_en.md). +The accuracy and speed indicators of AlexNet [[18](#ref18)], SqueezeNet series [[19](#ref19)], VGG series [[20](#ref20)], DarkNet53 [[21](#ref21)] and other models are shown in the following table. For more information, please refer to: [Other model documents](../models/Others_en.md). | Model | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | Pretrained Model Download Address | Inference Model Download Address | |------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| @@ -525,3 +514,86 @@ The accuracy and speed indicators of AlexNet, SqueezeNet series, VGG series, Dar | VGG16 | 0.720 | 0.907 | 2.48 | 6.79 | 12.33 | 15.470 | 138.35 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG16_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VGG16_infer.tar) | | VGG19 | 0.726 | 0.909 | 2.93 | 8.28 | 15.21 | 19.63 | 143.66 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG19_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VGG19_infer.tar) | | DarkNet53 | 0.780 | 0.941 | 2.79 | 6.42 | 10.89 | 9.31 | 41.65 | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams) | [Download link](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DarkNet53_infer.tar) | + + + +## Reference + +[1] He K, Zhang X, Ren S, et al. Deep residual learning for image recognition[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778. + +[2] He T, Zhang Z, Zhang H, et al. Bag of tricks for image classification with convolutional neural networks[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019: 558-567. + +[3] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1314-1324. + +[4] Sandler M, Howard A, Zhu M, et al. Mobilenetv2: Inverted residuals and linear bottlenecks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 4510-4520. + +[5] Howard A G, Zhu M, Chen B, et al. Mobilenets: Efficient convolutional neural networks for mobile vision applications[J]. arXiv preprint arXiv:1704.04861, 2017. + +[6] Ma N, Zhang X, Zheng H T, et al. Shufflenet v2: Practical guidelines for efficient cnn architecture design[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 116-131. + +[7] Xie S, Girshick R, Dollár P, et al. Aggregated residual transformations for deep neural networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1492-1500. + +[8] Hu J, Shen L, Sun G. Squeeze-and-excitation networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 7132-7141. + +[9] Gao S, Cheng M M, Zhao K, et al. Res2net: A new multi-scale backbone architecture[J]. IEEE transactions on pattern analysis and machine intelligence, 2019. + +[10] Szegedy C, Liu W, Jia Y, et al. Going deeper with convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 1-9. + +[11] Szegedy C, Ioffe S, Vanhoucke V, et al. Inception-v4, inception-resnet and the impact of residual connections on learning[C]//Thirty-first AAAI conference on artificial intelligence. 2017. + +[12] Chollet F. Xception: Deep learning with depthwise separable convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1251-1258. + +[13] Wang J, Sun K, Cheng T, et al. Deep high-resolution representation learning for visual recognition[J]. arXiv preprint arXiv:1908.07919, 2019. + +[14] Chen Y, Li J, Xiao H, et al. Dual path networks[C]//Advances in neural information processing systems. 2017: 4467-4475. + +[15] Huang G, Liu Z, Van Der Maaten L, et al. Densely connected convolutional networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 4700-4708. + +[16] Tan M, Le Q V. Efficientnet: Rethinking model scaling for convolutional neural networks[J]. arXiv preprint arXiv:1905.11946, 2019. + +[17] Mahajan D, Girshick R, Ramanathan V, et al. Exploring the limits of weakly supervised pretraining[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 181-196. + +[18] Krizhevsky A, Sutskever I, Hinton G E. Imagenet classification with deep convolutional neural networks[C]//Advances in neural information processing systems. 2012: 1097-1105. + +[19] Iandola F N, Han S, Moskewicz M W, et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size[J]. arXiv preprint arXiv:1602.07360, 2016. + +[20] Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556, 2014. + +[21] Redmon J, Divvala S, Girshick R, et al. You only look once: Unified, real-time object detection[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 779-788. + +[22] Ding X, Guo Y, Ding G, et al. Acnet: Strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1911-1920. + +[23] Han K, Wang Y, Tian Q, et al. GhostNet: More features from cheap operations[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 1580-1589. + +[24] Zhang H, Wu C, Zhang Z, et al. Resnest: Split-attention networks[J]. arXiv preprint arXiv:2004.08955, 2020. + +[25] Radosavovic I, Kosaraju R P, Girshick R, et al. Designing network design spaces[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 10428-10436. + +[26] C.Szegedy, V.Vanhoucke, S.Ioffe, J.Shlens, and Z.Wojna. Rethinking the inception architecture for computer vision. arXiv preprint arXiv:1512.00567, 2015. + +[27] Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin and Baining Guo. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. + +[28]Cheng Cui, Tingquan Gao, Shengyu Wei, Yuning Du, Ruoyu Guo, Shuilong Dong, Bin Lu, Ying Zhou, Xueying Lv, Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LCNet: A Lightweight CPU Convolutional Neural Network. + +[29]Mingxing Tan, Quoc V. Le. MixConv: Mixed Depthwise Convolutional Kernels. + +[30]Dongyoon Han, Sangdoo Yun, Byeongho Heo, YoungJoon Yoo. Rethinking Channel Dimensions for Efficient Model Design. + +[31]Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. AN IMAGE IS WORTH 16X16 WORDS: +TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE. + +[32]Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Herve Jegou. Training data-efficient image transformers & distillation through attention. + +[33]Benjamin Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Herve Jegou, Matthijs Douze. LeViT: a Vision Transformer in ConvNet’s Clothing for Faster Inference. + +[34]Xiangxiang Chu, Zhi Tian, Yuqing Wang, Bo Zhang, Haibing Ren, Xiaolin Wei, Huaxia Xia, Chunhua Shen. Twins: Revisiting the Design of Spatial Attention in Vision Transformers. + +[35]Kai Han, An Xiao, Enhua Wu, Jianyuan Guo, Chunjing Xu, Yunhe Wang. Transformer in Transformer. + +[36]Xiaohan Ding, Xiangyu Zhang, Ningning Ma, Jungong Han, Guiguang Ding, Jian Sun. RepVGG: Making VGG-style ConvNets Great Again. + +[37]Ping Chao, Chao-Yang Kao, Yu-Shan Ruan, Chien-Hsiang Huang, Youn-Long Lin. HarDNet: A Low Memory Traffic Network. + +[38]Fisher Yu, Dequan Wang, Evan Shelhamer, Trevor Darrell. Deep Layer Aggregation. + +[39]Duo Lim Jie Hu, Changhu Wang, Xiangtai Li, Qi She, Lei Zhu, Tong Zhang, Qifeng Chen. Involution: Inverting the Inherence of Convolution for Visual Recognition. diff --git a/docs/en/algorithm_introduction/image_classification_en.md b/docs/en/algorithm_introduction/image_classification_en.md index 3b28b04d61b7d69267529da94b5f15202092a9c9..fa2319c105e50f467a7c9bb078d8d09516890892 100644 --- a/docs/en/algorithm_introduction/image_classification_en.md +++ b/docs/en/algorithm_introduction/image_classification_en.md @@ -1,3 +1,4 @@ +# Image Classification Task Introduction ## Catalogue - [1. Dataset Introduction](#1) diff --git a/docs/en/algorithm_introduction/index.rst b/docs/en/algorithm_introduction/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e0fad797377ba99fb182578d9ae630b29339fbde --- /dev/null +++ b/docs/en/algorithm_introduction/index.rst @@ -0,0 +1,12 @@ +algorithm_introduction +================================ + +.. toctree:: + :maxdepth: 2 + + image_classification_en.md + metric_learning_en.md + knowledge_distillation_en.md + model_prune_quantization_en.md + ImageNet_models_en.md + DataAugmentation_en.md diff --git a/docs/en/conf.py b/docs/en/conf.py index 1b5a0c12462c502baf2e68018c63bc7b118bc968..fef10eec6b0e4efcd07849aff767872a12434c25 100644 --- a/docs/en/conf.py +++ b/docs/en/conf.py @@ -10,70 +10,56 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os -import recommonmark +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) +import sphinx_rtd_theme +from recommonmark.parser import CommonMarkParser +# -- Project information ----------------------------------------------------- -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +project = 'PaddleClas-en' +copyright = '2022, PaddleClas' +author = 'PaddleClas' -# -- Project information ----------------------------------------------------- +# The full version, including alpha/beta/rc tags +release = '2.3' -project = 'PaddleClas' -copyright = '2020, paddlepaddle' -author = 'paddlepaddle' # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. - +source_parsers = { + '.md': CommonMarkParser, +} +source_suffix = ['.rst', '.md'] extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon', - 'sphinx.ext.coverage', - 'sphinx.ext.viewcode', - 'sphinx.ext.mathjax', - 'sphinx.ext.githubpages', - 'sphinx.ext.napoleon', - 'recommonmark', - 'sphinx_markdown_tables', -] - + 'recommonmark', + 'sphinx_markdown_tables' + ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] -# md file can also be parased -source_suffix = ['.rst', '.md'] +# The root document. +root_doc = 'doc_en' -# The master toctree document. -master_doc = 'index' +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = 'en' # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. - -# on_rtd is whether we are on readthedocs.org, this line of code grabbed from docs.readthedocs.org -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' - -if not on_rtd: # only import and set the theme if we're building docs locally - import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - -# otherwise, readthedocs.org uses their theme by default, so no need to specify it +# +# 更改文档配色 +html_theme = "sphinx_rtd_theme" +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". - html_static_path = ['_static'] - -html_logo = '../images/logo.png' diff --git a/docs/en/data_preparation/index.rst b/docs/en/data_preparation/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e668126ecc8b1506d2008f0bfe7c8abf2678f357 --- /dev/null +++ b/docs/en/data_preparation/index.rst @@ -0,0 +1,8 @@ +data_preparation +================================ + +.. toctree:: + :maxdepth: 2 + + recognition_dataset_en.md + classification_dataset_en.md diff --git a/docs/en/doc_en.rst b/docs/en/doc_en.rst new file mode 100644 index 0000000000000000000000000000000000000000..aa25829f5c811d8419dbcb18558584c2cb5a29fe --- /dev/null +++ b/docs/en/doc_en.rst @@ -0,0 +1,23 @@ +Welcome to PaddleClas! +================================ + +.. toctree:: + :maxdepth: 1 + + introduction/index + installation/index + quick_start/index + image_recognition_pipeline/index + data_preparation/index + models_training/index + inference_deployment/index + models/index + algorithm_introduction/index + advanced_tutorials/index + others/index + faq_series/index + + + + + diff --git a/docs/en/extension/VisualDL_en.md b/docs/en/extension/VisualDL_en.md deleted file mode 100644 index 9ffd03e92f19a617d7412894073957490dd7b799..0000000000000000000000000000000000000000 --- a/docs/en/extension/VisualDL_en.md +++ /dev/null @@ -1,44 +0,0 @@ -# Use VisualDL to visualize the training - -## Preface -VisualDL, a visualization analysis tool of PaddlePaddle, provides a variety of charts to show the trends of parameters, and visualizes model structures, data samples, histograms of tensors, PR curves , ROC curves and high-dimensional data distributions. It enables users to understand the training process and the model structure more clearly and intuitively so as to optimize models efficiently. For more information, please refer to [VisualDL](https://github.com/PaddlePaddle/VisualDL/). - -## Use VisualDL in PaddleClas -Now PaddleClas support use VisualDL to visualize the changes of learning rate, loss, accuracy in training. - -### Set config and start training -You only need to set the field `Global.use_visualdl` to `True` in train config: - -```yaml -# config.yaml -Global: -... - use_visualdl: True -... -``` - -PaddleClas will save the VisualDL logs to subdirectory `vdl/` under the output directory specified by `Global.output_dir`. And then you just need to start training normally: - -```shell -python3 tools/train.py -c config.yaml -``` - -### Start VisualDL -After starting the training program, you can start the VisualDL service in a new terminal session: - -```shell - visualdl --logdir ./output/vdl/ -``` - -In the above command, `--logdir` specify the directory of the VisualDL logs produced in training. VisualDL will traverse and iterate to find the subdirectories of the specified directory to visualize all the experimental results. You can also use the following parameters to set the IP and port number of the VisualDL service: - -* `--host`:ip, default is 127.0.0.1 -* `--port`:port, default is 8040 - -More information about the command,please refer to [VisualDL](https://github.com/PaddlePaddle/VisualDL/blob/develop/README.md#2-launch-panel). - -Then you can enter the address `127.0.0.1:8840` and view the training process in the browser: - -
- -
diff --git a/docs/en/extension/index.rst b/docs/en/extension/index.rst deleted file mode 100644 index 4d72ea47936b927705d80077d3762ff75d1a13ae..0000000000000000000000000000000000000000 --- a/docs/en/extension/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -extension -================================ - -.. toctree:: - :maxdepth: 1 - - paddle_inference_en.md - paddle_mobile_inference_en.md - paddle_quantization_en.md - multi_machine_training_en.md - paddle_hub_en.md - paddle_serving_en.md diff --git a/docs/en/extension/multi_machine_training_en.md b/docs/en/extension/multi_machine_training_en.md deleted file mode 100644 index d4fb997842717a7342f4acc457229aa07d290365..0000000000000000000000000000000000000000 --- a/docs/en/extension/multi_machine_training_en.md +++ /dev/null @@ -1,11 +0,0 @@ -# Distributed Training - -Distributed deep neural networks training is highly efficient in PaddlePaddle. -And it is one of the PaddlePaddle's core advantage technologies. -On image classification tasks, distributed training can achieve almost linear acceleration ratio. -[Fleet](https://github.com/PaddlePaddle/Fleet) is High-Level API for distributed training in PaddlePaddle. -By using Fleet, a user can shift from local machine paddlepaddle code to distributed code easily. -In order to support both single-machine training and multi-machine training, -[PaddleClas](https://github.com/PaddlePaddle/PaddleClas) uses the Fleet API interface. -For more information about distributed training, -please refer to [Fleet API documentation](https://github.com/PaddlePaddle/Fleet/blob/develop/README.md). diff --git a/docs/en/extension/paddle_hub_en.md b/docs/en/extension/paddle_hub_en.md deleted file mode 100644 index d9d833a01cf26df92a040881827d7d855d75ef9a..0000000000000000000000000000000000000000 --- a/docs/en/extension/paddle_hub_en.md +++ /dev/null @@ -1,6 +0,0 @@ -# Paddle Hub - -[PaddleHub](https://github.com/PaddlePaddle/PaddleHub) is a pre-trained model application tool for PaddlePaddle. -Developers can conveniently use the high-quality pre-trained model combined with Fine-tune API to quickly complete the whole process from model migration to deployment. -All the pre-trained models of [PaddleClas](https://github.com/PaddlePaddle/PaddleClas) have been collected by PaddleHub. -For further details, please refer to [PaddleHub website](https://www.paddlepaddle.org.cn/hub). diff --git a/docs/en/extension/paddle_mobile_inference_en.md b/docs/en/extension/paddle_mobile_inference_en.md deleted file mode 100644 index 86c8e040d139e30795f6d71a98a0f5b1c277b851..0000000000000000000000000000000000000000 --- a/docs/en/extension/paddle_mobile_inference_en.md +++ /dev/null @@ -1,114 +0,0 @@ -# Paddle-Lite - -## Introduction - -[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) is a set of lightweight inference engine which is fully functional, easy to use and then performs well. Lightweighting is reflected in the use of fewer bits to represent the weight and activation of the neural network, which can greatly reduce the size of the model, solve the problem of limited storage space of the mobile device, and the inference speed is better than other frameworks on the whole. - -In [PaddleClas](https://github.com/PaddlePaddle/PaddleClas), we uses Paddle-Lite to [evaluate the performance on the mobile device](../models/Mobile_en.md), in this section we uses the `MobileNetV1` model trained on the `ImageNet1k` dataset as an example to introduce how to use `Paddle-Lite` to evaluate the model speed on the mobile terminal (evaluated on SD855) - -## Evaluation Steps - -### Export the Inference Model - -* First you should transform the saved model during training to the special model which can be used to inference, the special model can be exported by `tools/export_model.py`, the specific way of transform is as follows. - -```shell -python tools/export_model.py -m MobileNetV1 -p pretrained/MobileNetV1_pretrained/ -o inference/MobileNetV1 -``` - -Finally the `model` and `parmas` can be saved in `inference/MobileNetV1`. - - -### Download Benchmark Binary File - -* Use the adb (Android Debug Bridge) tool to connect the Android phone and the PC, then develop and debug. After installing adb and ensuring that the PC and the phone are successfully connected, use the following command to view the ARM version of the phone and select the pre-compiled library based on ARM version. - -```shell -adb shell getprop ro.product.cpu.abi -``` - -* Download Benchmark_bin File - -```shell -wget -c https://paddle-inference-dist.bj.bcebos.com/PaddleLite/benchmark_0/benchmark_bin_v8 -``` - -If the ARM version is v7, the v7 benchmark_bin file should be downloaded, the command is as follow. - -```shell -wget -c https://paddle-inference-dist.bj.bcebos.com/PaddleLite/benchmark_0/benchmark_bin_v7 -``` - -### Inference benchmark - -After the PC and mobile phone are successfully connected, use the following command to start the model evaluation. - -``` -sh deploy/lite/benchmark/benchmark.sh ./benchmark_bin_v8 ./inference result_armv8.txt true -``` - -Where `./benchmark_bin_v8` is the path of the benchmark binary file, `./inference` is the path of all the models that need to be evaluated, `result_armv8.txt` is the result file, and the final parameter `true` means that the model will be optimized before evaluation. Eventually, the evaluation result file of `result_armv8.txt` will be saved in the current folder. The specific performances are as follows. - -``` -PaddleLite Benchmark -Threads=1 Warmup=10 Repeats=30 -MobileNetV1 min = 30.89100 max = 30.73600 average = 30.79750 - -Threads=2 Warmup=10 Repeats=30 -MobileNetV1 min = 18.26600 max = 18.14000 average = 18.21637 - -Threads=4 Warmup=10 Repeats=30 -MobileNetV1 min = 10.03200 max = 9.94300 average = 9.97627 -``` - -Here is the model inference speed under different number of threads, the unit is FPS, taking model on one threads as an example, the average speed of MobileNetV1 on SD855 is `30.79750FPS`. - -### Model Optimization and Speed Evaluation - -* In II.III section, we mention that the model will be optimized before evaluation, here you can first optimize the model, and then directly load the optimized model for speed evaluation - -* Paddle-Lite -In Paddle-Lite, we provides multiple strategies to automatically optimize the original training model, which contain Quantify, Subgraph fusion, Hybrid scheduling, Kernel optimization and so on. In order to make the optimization more convenient and easy to use, we provide opt tools to automatically complete the optimization steps and output a lightweight, optimal and executable model in Paddle-Lite, which can be downloaded on [Paddle-Lite Model Optimization Page](https://paddle-lite.readthedocs.io/zh/latest/user_guides/model_optimize_tool.html). Here we take `MacOS` as our development environment, download[opt_mac](https://paddlelite-data.bj.bcebos.com/model_optimize_tool/opt_mac) model optimization tools and use the following commands to optimize the model. - - -```shell -model_file="../MobileNetV1/model" -param_file="../MobileNetV1/params" -opt_models_dir="./opt_models" -mkdir ${opt_models_dir} -./opt_mac --model_file=${model_file} \ - --param_file=${param_file} \ - --valid_targets=arm \ - --optimize_out_type=naive_buffer \ - --prefer_int8_kernel=false \ - --optimize_out=${opt_models_dir}/MobileNetV1 -``` - -Where the `model_file` and `param_file` are exported model file and the file address respectively, after transforming successfully, the `MobileNetV1.nb` will be saved in `opt_models` - - - -Use the benchmark_bin file to load the optimized model for evaluation. The commands are as follows. - -```shell -bash benchmark.sh ./benchmark_bin_v8 ./opt_models result_armv8.txt -``` - -Finally the result is saved in `result_armv8.txt` and shown as follow. - -``` -PaddleLite Benchmark -Threads=1 Warmup=10 Repeats=30 -MobileNetV1_lite min = 30.89500 max = 30.78500 average = 30.84173 - -Threads=2 Warmup=10 Repeats=30 -MobileNetV1_lite min = 18.25300 max = 18.11000 average = 18.18017 - -Threads=4 Warmup=10 Repeats=30 -MobileNetV1_lite min = 10.00600 max = 9.90000 average = 9.96177 -``` - - -Taking the model on one threads as an example, the average speed of MobileNetV1 on SD855 is `30.84173FPS`. - -More specific parameter explanation and Paddle-Lite usage can refer to [Paddle-Lite docs](https://paddle-lite.readthedocs.io/zh/latest/)。 diff --git a/docs/en/extension/paddle_quantization_en.md b/docs/en/extension/paddle_quantization_en.md deleted file mode 100644 index e84e3a820e585e974d7a011a0a1a33a89298f90b..0000000000000000000000000000000000000000 --- a/docs/en/extension/paddle_quantization_en.md +++ /dev/null @@ -1,12 +0,0 @@ -# Model Quantifization - -Int8 quantization is one of the key features in [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim). -It supports two kinds of training aware, **Dynamic strategy** and **Static strategy**, -layer-wise and channel-wise quantization, -and using PaddleLite to deploy models generated by PaddleSlim. - -By using this toolkit, [PaddleClas](https://github.com/PaddlePaddle/PaddleClas) quantized the mobilenet_v3_large_x1_0 model whose accuracy is 78.9% after distilled. -After quantized, the prediction speed is accelerated from 19.308ms to 14.395ms on SD855. -The storage size is reduced from 21M to 10M. -The top1 recognition accuracy rate is 75.9%. -For specific training methods, please refer to [PaddleSlim quant aware](../../../deploy/slim/README_en.md)。 diff --git a/docs/en/extension/paddle_serving_en.md b/docs/en/extension/paddle_serving_en.md deleted file mode 100644 index 3ad259526a411c46f3e25207ebf77aa45198d67e..0000000000000000000000000000000000000000 --- a/docs/en/extension/paddle_serving_en.md +++ /dev/null @@ -1,64 +0,0 @@ -# Model Service Deployment - -## Overview -[Paddle Serving](https://github.com/PaddlePaddle/Serving) aims to help deep-learning researchers to easily deploy online inference services, supporting one-click deployment of industry, high concurrency and efficient communication between client and server and supporting multiple programming languages to develop clients. - -Taking HTTP inference service deployment as an example to introduce how to use PaddleServing to deploy model services in PaddleClas. - -## Serving Install - -It is recommends to use docker to install and deploy the Serving environment in the Serving official website, first, you need to pull the docker environment and create Serving-based docker. - -```shell -nvidia-docker pull hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu -nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/serving:0.2.0-gpu -nvidia-docker exec -it test bash -``` - -In docker, you need to install some packages about Serving - -```shell -pip install paddlepaddle-gpu -pip install paddle-serving-client -pip install paddle-serving-server-gpu -``` - -* If the installation speed is too slow, you can add `-i https://pypi.tuna.tsinghua.edu.cn/simple` following pip to speed up the process. - -* If you want to deploy CPU service, you can install the cpu version of Serving, the command is as follow. - -```shell -pip install paddle-serving-server -``` - -### Export Model - -Exporting the Serving model using `tools/export_serving_model.py`, taking ResNet50_vd as an example, the command is as follow. - -```shell -python tools/export_serving_model.py -m ResNet50_vd -p ./pretrained/ResNet50_vd_pretrained/ -o serving -``` - -finally, the client configures, model parameters and structure file will be saved in `ppcls_client_conf` and `ppcls_model`. - - -### Service Deployment and Request - -* Using the following commands to start the Serving. - -```shell -python tools/serving/image_service_gpu.py serving/ppcls_model workdir 9292 -``` - -`serving/ppcls_model` is the address of the Serving model just saved, `workdir` is the work directory, and `9292` is the port of the service. - - -* Using the following script to send an identification request to the Serving and return the result. - -``` -python tools/serving/image_http_client.py 9292 ./docs/images/logo.png -``` - -`9292` is the port for sending the request, which is consistent with the Serving starting port, and `./docs/images/logo.png` is the test image, the final top1 label and probability are returned. - -* For more Serving deployment, such RPC inference service, you can refer to the Serving official website: [https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/imagenet) diff --git a/docs/en/extension/train_with_DALI_en.md b/docs/en/extension/train_with_DALI_en.md deleted file mode 100644 index a67a76166b0d890d69f5e2a8cd14c68b146c785b..0000000000000000000000000000000000000000 --- a/docs/en/extension/train_with_DALI_en.md +++ /dev/null @@ -1,62 +0,0 @@ -# Train with DALI - -## Preface -[The NVIDIA Data Loading Library](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/index.html) (DALI) is a library for data loading and pre-processing to accelerate deep learning applications. It can build Dataloader of Paddle. - -Since the Deep learning relies on a large amount of data in the training stage, these data need to be loaded and preprocessed. These operations are usually executed on the CPU, which limits the further improvement of the training speed, especially when the batch_size is large, which become the bottleneck of speed. DALI can use GPU to accelerate these operations, thereby further improve the training speed. - -## Installing DALI -DALI only support Linux x64 and version of CUDA is 10.2 or later. - -* For CUDA 10: - - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda100 - -* For CUDA 11.0: - - pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda110 - -For more information about installing DALI, please refer to [DALI](https://docs.nvidia.com/deeplearning/dali/user-guide/docs/installation.html). - -## Using DALI -Paddleclas supports training with DALI in static graph. Since DALI only supports GPU training, `CUDA_VISIBLE_DEVICES` needs to be set, and DALI needs to occupy GPU memory, so it needs to reserve GPU memory for Dali. To train with DALI, just set the fields in the training config `use_dali = True`, or start the training by the following command: - -```shell -# set the GPUs that can be seen -export CUDA_VISIBLE_DEVICES="0" - -# set the GPU memory used for neural network training, generally 0.8 or 0.7, and the remaining GPU memory is reserved for DALI -export FLAGS_fraction_of_gpu_memory_to_use=0.80 - -python tools/static/train.py -c configs/ResNet/ResNet50.yaml -o use_dali=True -``` - -And you can train with muti-GPUs: - -```shell -# set the GPUs that can be seen -export CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" - -# set the GPU memory used for neural network training, generally 0.8 or 0.7, and the remaining GPU memory is reserved for DALI -export FLAGS_fraction_of_gpu_memory_to_use=0.80 - -python -m paddle.distributed.launch \ - --gpus="0,1,2,3,4,5,6,7" \ - tools/static/train.py \ - -c ./configs/ResNet/ResNet50.yaml \ - -o use_dali=True -``` - -## Train with FP16 - -On the basis of the above, using FP16 half-precision can further improve the training speed, you can refer to the following command. - -```shell -export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 -export FLAGS_fraction_of_gpu_memory_to_use=0.8 - -python -m paddle.distributed.launch \ - --gpus="0,1,2,3,4,5,6,7" \ - tools/static/train.py \ - -c configs/ResNet/ResNet50_fp16.yaml -``` diff --git a/docs/en/faq_series/index.rst b/docs/en/faq_series/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..69a9f2d2a6fdad7a30c49fb000b93caf28ffa3a1 --- /dev/null +++ b/docs/en/faq_series/index.rst @@ -0,0 +1,10 @@ +faq_series +================================ + +.. toctree:: + :maxdepth: 2 + + faq_2021_s2_en.md + faq_2021_s1_en.md + faq_2020_s1_en.md + faq_selected_30_en.md diff --git a/docs/en/image_recognition_pipeline/feature_extraction_en.md b/docs/en/image_recognition_pipeline/feature_extraction_en.md index 4ff01f51557b9f839f656323c33366eb88243821..f86562a37416c406497cb3723d50dc02332e4e51 100644 --- a/docs/en/image_recognition_pipeline/feature_extraction_en.md +++ b/docs/en/image_recognition_pipeline/feature_extraction_en.md @@ -58,12 +58,12 @@ The results are shown in the table below: - Address of the pre-training model: [General recognition pre-training model](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/pretrain/general_PPLCNet_x2_5_pretrained_v1.0.pdparams) -# 4.Customized Feature Extraction +## 4.Customized Feature Extraction Customized feature extraction refers to retraining the feature extraction model based on one's own task. It consists of four main steps: 1) data preparation, 2) model training, 3) model evaluation, and 4) model inference. -## 4.1 Data Preparation +### 4.1 Data Preparation To start with, customize your dataset based on the task (See [Format description](../data_preparation/recognition_dataset_en.md#1) for the dataset format). Before initiating the model training, modify the data-related content in the configuration files, including the address of the dataset and the class number. The corresponding locations in configuration files are shown below: @@ -99,7 +99,7 @@ Train: ``` -## 4.2 Model Training +### 4.2 Model Training - Single machine single card training @@ -130,7 +130,7 @@ python -m paddle.distributed.launch \ ``` -## 4.3 Model Evaluation +### 4.3 Model Evaluation - Single Card Evaluation @@ -154,21 +154,21 @@ python -m paddle.distributed.launch \ **Recommendation:** It is suggested to employ multi-card evaluation, which can quickly obtain the feature set of the overall dataset using multi-card parallel computing, accelerating the evaluation process. -## 4.4 Model Inference +### 4.4 Model Inference Two steps are included in the inference: 1)exporting the inference model; 2)obtaining the feature vector. -### 4.4.1 Export Inference Model +#### 4.4.1 Export Inference Model ``` -python tools/export_model \ +python tools/export_model.py \ -c ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml \ -o Global.pretrained_model="output/RecModel/best_model" ``` The generated inference models are under the directory `inference`, which comprises three files, namely, `inference.pdmodel`、`inference.pdiparams`、`inference.pdiparams.info`. Among them, `inference.pdmodel` serves to store the structure of inference model while `inference.pdiparams` and `inference.pdiparams.info` are mobilized to store model-related parameters. -### 4.4.2 Obtain Feature Vector +#### 4.4.2 Obtain Feature Vector ``` cd deploy diff --git a/docs/en/image_recognition_pipeline/index.rst b/docs/en/image_recognition_pipeline/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..95aa67d4caca1a0c9bb6ff629fde764dc41d379d --- /dev/null +++ b/docs/en/image_recognition_pipeline/index.rst @@ -0,0 +1,9 @@ +image_recognition_pipeline +================================ + +.. toctree:: + :maxdepth: 2 + + mainbody_detection_en.md + feature_extraction_en.md + vector_search_en.md diff --git a/docs/en/index.rst b/docs/en/index.rst index b8a2e9096c0a119fcf667d74f622bdad23409067..0bb50d3dc0e14a02cf37af99606ce5817245a24b 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -1,17 +1,18 @@ -Welcome to PaddleClas! +欢迎使用PaddleClas图像分类库! ================================ .. toctree:: - :maxdepth: 1 - :numbered: - :caption: Contents: - - tutorials/index + :maxdepth: 2 + + models_training/index + introduction/index + image_recognition_pipeline/index + others/index + faq_series/index + data_preparation/index + installation/index models/index advanced_tutorials/index - application/index - extension/index - competition_support_en.md - update_history_en.md - faq_en.md - + algorithm_introduction/index + inference_deployment/index + quick_start/index diff --git a/docs/en/inference_deployment/cpp_deploy_en.md b/docs/en/inference_deployment/cpp_deploy_en.md index 1b65ebf626b9954df18fe9dde1138fc609af9ebe..3f92b662b848bf206ab86e9864bf6d730596bbd7 100644 --- a/docs/en/inference_deployment/cpp_deploy_en.md +++ b/docs/en/inference_deployment/cpp_deploy_en.md @@ -293,8 +293,6 @@ sh tools/run.sh * The prediction results will be shown on the screen, which is as follows. -
- -
+![](../../images/inference_deployment/cpp_infer_result.png) * In the above results,`class id` represents the id corresponding to the category with the highest confidence, and `score` represents the probability that the image belongs to that category. diff --git a/docs/en/inference_deployment/index.rst b/docs/en/inference_deployment/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1e3c344e46c5328bdd32d7c987d7e44d63a9478f --- /dev/null +++ b/docs/en/inference_deployment/index.rst @@ -0,0 +1,19 @@ +inference_deployment +================================ + +.. toctree:: + :maxdepth: 2 + + export_model_en.md + python_deploy_en.md + cpp_deploy_en.md + paddle_serving_deploy_en.md + paddle_hub_serving_deploy_en.md + paddle_lite_deploy_en.md + whl_deploy_en.md + + + + + + diff --git a/docs/en/inference_deployment/paddle_lite_deploy_en.md b/docs/en/inference_deployment/paddle_lite_deploy_en.md index b584aafdc30c812644354de333e40f558c687b3b..12d45dd512a022850f87a922d28e19f5affd56f4 100644 --- a/docs/en/inference_deployment/paddle_lite_deploy_en.md +++ b/docs/en/inference_deployment/paddle_lite_deploy_en.md @@ -258,9 +258,7 @@ export LD_LIBRARY_PATH=/data/local/tmp/debug:$LD_LIBRARY_PATH The result is as follows: -
- -
+![](../../images/inference_deployment/lite_demo_result.png) ## 3. FAQ diff --git a/docs/en/inference_deployment/whl_deploy_en.md b/docs/en/inference_deployment/whl_deploy_en.md index e97cbfd7b717dd7f8e71cf1466669c9714b05ab2..224d41a7c1f2de9886fd830a36b8910dae0f97b6 100644 --- a/docs/en/inference_deployment/whl_deploy_en.md +++ b/docs/en/inference_deployment/whl_deploy_en.md @@ -39,9 +39,7 @@ pip3 install dist/* ## 2. Quick Start * Using the `ResNet50` model provided by PaddleClas, the following image(`'docs/images/inference_deployment/whl_demo.jpg'`) as an example. -
- -
+![](../../images/inference_deployment/whl_demo.jpg) * Python ```python diff --git a/docs/en/installation/index.rst b/docs/en/installation/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..39d432ae7098709006ef8326d818f67623bf2fac --- /dev/null +++ b/docs/en/installation/index.rst @@ -0,0 +1,8 @@ +installation +================================ + +.. toctree:: + :maxdepth: 2 + + install_paddle_en.md + install_paddleclas_en.md diff --git a/docs/en/installation/install_paddle_en.md b/docs/en/installation/install_paddle_en.md index 3922b7325da66e53c6dd1b1b31f0dc6966d2252f..c282f3ed602e7988b4cd7533e1a32218a77aa7fe 100644 --- a/docs/en/installation/install_paddle_en.md +++ b/docs/en/installation/install_paddle_en.md @@ -1,4 +1,4 @@ -# Installation PaddlePaddle +# Install PaddlePaddle --- @@ -9,7 +9,7 @@ - [3. Install PaddlePaddle using pip](#3) - [4. Verify installation](#4) -At present, **PaddleClas** requires **PaddlePaddle** version **>=2.0**. Docker is recomended to run Paddleclas, for more detailed information about docker and nvidia-docker, you can refer to the [tutorial](https://docs.docker.com/get-started/). If you do not want to use docker, you can skip section [2. (Recommended) Prepare a docker environment](#2), and go into section [3. Install PaddlePaddle using pip](#3). +At present, **PaddleClas** requires **PaddlePaddle** version `>=2.0`. Docker is recomended to run Paddleclas, for more detailed information about docker and nvidia-docker, you can refer to the [tutorial](https://docs.docker.com/get-started/). If you do not want to use docker, you can skip section [2. (Recommended) Prepare a docker environment](#2), and go into section [3. Install PaddlePaddle using pip](#3). @@ -96,5 +96,5 @@ python -c "import paddle; print(paddle.__version__)" Note: * Make sure the compiled source code is later than PaddlePaddle2.0. -* Indicate **WITH_DISTRIBUTE=ON** when compiling, Please refer to [Instruction](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/Tables.html#id3) for more details. +* Indicate `WITH_DISTRIBUTE=ON` when compiling, Please refer to [Instruction](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/Tables.html#id3) for more details. * When running in docker, in order to ensure that the container has enough shared memory for dataloader acceleration of Paddle, please set the parameter `--shm-size=8g` at creating a docker container, if conditions permit, you can set it to a larger value. diff --git a/docs/en/introduction/index.rst b/docs/en/introduction/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..e22a647e856ecec44d261a363e8721a0e3caed68 --- /dev/null +++ b/docs/en/introduction/index.rst @@ -0,0 +1,8 @@ +introduction +================================ + +.. toctree:: + :maxdepth: 2 + + function_intro_en.md + more_demo/index diff --git a/docs/en/introduction/more_demo/index.rst b/docs/en/introduction/more_demo/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..f09bccf2863aff9cc51925fca8329c3caa1a201a --- /dev/null +++ b/docs/en/introduction/more_demo/index.rst @@ -0,0 +1,11 @@ +more_demo +================================ + +.. toctree:: + :maxdepth: 1 + + product.md + logo.md + cartoon.md + more_demo.md + vehicle.md diff --git a/docs/en/models/PP-LCNet_en.md b/docs/en/models/PP-LCNet_en.md index 0fae0bf8cf5e33d42447270e8c5a9afc6c4432a9..1dd35e09d5d387a5cace9fb4131c304abd017f2a 100644 --- a/docs/en/models/PP-LCNet_en.md +++ b/docs/en/models/PP-LCNet_en.md @@ -27,13 +27,13 @@ In the field of computer vision, the quality of backbone network determines the ## 2. Introduction Recent years witnessed the emergence of many lightweight backbone networks. In past two years, in particular, there were abundant networks searched by NAS that either enjoy advantages on FLOPs or Params, or have an edge in terms of inference speed on ARM devices. However, few of them dedicated to specified optimization of Intel CPU, resulting their imperfect inference speed on the intel CPU side. Based on this, we specially design the backbone network PP-LCNet for Intel CPU devices with its acceleration library MKLDNN. Compared with other lightweight SOTA models, this backbone network can further improve the performance of the model without increasing the inference time, significantly outperforming the existing SOTA models. A comparison chart with other models is shown below. -
+![](../../images/PP-LCNet/PP-LCNet-Acc.png) ## 3. Method The overall structure of the network is shown in the figure below. -
+![](../../images/PP-LCNet/PP-LCNet.png) Build on extensive experiments, we found that many seemingly less time-consuming operations will increase the latency on Intel CPU-based devices, especially when the MKLDNN acceleration library is enabled. Therefore, we finally chose a block with the leanest possible structure and the fastest possible speed to form our BaseNet (similar to MobileNetV1). Based on BaseNet, we summarized four strategies that can improve the accuracy of the model without increasing the latency, and we combined these four strategies to form PP-LCNet. Each of these four strategies is introduced as below: diff --git a/docs/en/models/index.rst b/docs/en/models/index.rst index 73b2a1c104896ede5712c44bea2c54eaa02cb51f..4642eb1de4cd93daf58fffd9e08e476f2b0968d3 100644 --- a/docs/en/models/index.rst +++ b/docs/en/models/index.rst @@ -2,15 +2,29 @@ models ================================ .. toctree:: - :maxdepth: 1 - - models_intro_en.md - Tricks_en.md - ResNet_and_vd_en.md - Mobile_en.md - SEResNext_and_Res2Net_en.md - Inception_en.md - HRNet_en.md + :maxdepth: 2 + DPN_DenseNet_en.md + models_intro_en.md + RepVGG_en.md EfficientNet_and_ResNeXt101_wsl_en.md + ViT_and_DeiT_en.md + SwinTransformer_en.md Others_en.md + SEResNext_and_Res2Net_en.md + ESNet_en.md + HRNet_en.md + ReXNet_en.md + Inception_en.md + TNT_en.md + RedNet_en.md + DLA_en.md + ResNeSt_RegNet_en.md + PP-LCNet_en.md + HarDNet_en.md + ResNet_and_vd_en.md + LeViT_en.md + Mobile_en.md + MixNet_en.md + Twins_en.md + PVTV2_en.md diff --git a/docs/en/models/models_intro_en.md b/docs/en/models/models_intro_en.md deleted file mode 100644 index feb67f0eb9bdd697324c7ea12de4ee7103199952..0000000000000000000000000000000000000000 --- a/docs/en/models/models_intro_en.md +++ /dev/null @@ -1,307 +0,0 @@ -# Model Library Overview - -## Overview - -Based on the ImageNet1k classification dataset, the 29 classification network structures supported by PaddleClas and the corresponding 134 image classification pretrained models are shown below. Training trick, a brief introduction to each series of network structures, and performance evaluation will be shown in the corresponding chapters. - -## Evaluation environment -* Arm evaluation environment is based on Snapdragon 855 (SD855). -* The GPU evaluation environment is based on V100 and TensorRT, and the evaluation script is as follows. - -```shell -#!/usr/bin/env bash - -export PYTHONPATH=$PWD:$PYTHONPATH - -python tools/infer/predict.py \ - --model_file='pretrained/infer/model' \ - --params_file='pretrained/infer/params' \ - --enable_benchmark=True \ - --model_name=ResNet50_vd \ - --use_tensorrt=True \ - --use_fp16=False \ - --batch_size=1 -``` - -
- -
- -
- -
- -
- -
- -> If you think this document is helpful to you, welcome to give a star to our project:[https://github.com/PaddlePaddle/PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - - -## Pretrained model list and download address -- ResNet and ResNet_vd series - - ResNet series[[1](#ref1)]([paper link](http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html)) - - [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_pretrained.pdparams) - - [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet34_pretrained.pdparams) - - [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_pretrained.pdparams) - - [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet101_pretrained.pdparams) - - [ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet152_pretrained.pdparams) - - ResNet_vc、ResNet_vd series[[2](#ref2)]([paper link](https://arxiv.org/abs/1812.01187)) - - [ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams) - - [ResNet18_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_vd_pretrained.pdparams) - - [ResNet34_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet34_vd_pretrained.pdparams) - - [ResNet34_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet34_vd_ssld_pretrained.pdparams) - - [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams) - - [ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_v2_pretrained.pdparams) - - [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet101_vd_pretrained.pdparams) - - [ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet152_vd_pretrained.pdparams) - - [ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet200_vd_pretrained.pdparams) - - [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams) - - [ResNet50_vd_ssld_v2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_v2_pretrained.pdparams) - - [Fix_ResNet50_vd_ssld_v2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Fix_ResNet50_vd_ssld_v2_pretrained.pdparams) - - [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet101_vd_ssld_pretrained.pdparams) - - -- Mobile and Embedded Vision Applications Network series - - MobileNetV3 series[[3](#ref3)]([paper link](https://arxiv.org/abs/1905.02244)) - - [MobileNetV3_large_x0_35](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_35_pretrained.pdparams) - - [MobileNetV3_large_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams) - - [MobileNetV3_large_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_75_pretrained.pdparams) - - [MobileNetV3_large_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams) - - [MobileNetV3_large_x1_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_25_pretrained.pdparams) - - [MobileNetV3_small_x0_35](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_35_pretrained.pdparams) - - [MobileNetV3_small_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_5_pretrained.pdparams) - - [MobileNetV3_small_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_75_pretrained.pdparams) - - [MobileNetV3_small_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_0_pretrained.pdparams) - - [MobileNetV3_small_x1_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_25_pretrained.pdparams) - - [MobileNetV3_large_x1_0_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_ssld_pretrained.pdparams) - - [MobileNetV3_large_x1_0_ssld_int8]()(coming soon) - - [MobileNetV3_small_x1_0_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_0_ssld_pretrained.pdparams) - - MobileNetV2 series[[4](#ref4)]([paper link](https://arxiv.org/abs/1801.04381)) - - [MobileNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams) - - [MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams) - - [MobileNetV2_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams) - - [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams) - - [MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams) - - [MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams) - - [MobileNetV2_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_ssld_pretrained.pdparams) - - MobileNetV1 series[[5](#ref5)]([paper link](https://arxiv.org/abs/1704.04861)) - - [MobileNetV1_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_25_pretrained.pdparams) - - [MobileNetV1_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_5_pretrained.pdparams) - - [MobileNetV1_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_75_pretrained.pdparams) - - [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_pretrained.pdparams) - - [MobileNetV1_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_ssld_pretrained.pdparams) - - ShuffleNetV2 series[[6](#ref6)]([paper link](https://arxiv.org/abs/1807.11164)) - - [ShuffleNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams) - - [ShuffleNetV2_x0_33](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams) - - [ShuffleNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams) - - [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams) - - [ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams) - - [ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams) - - [ShuffleNetV2_swish](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams) - - GhostNet series[[23](#ref23)]([paper link](https://arxiv.org/pdf/1911.11907.pdf)) - - [GhostNet_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams) - - [GhostNet_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams) - - [GhostNet_x1_3](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams) - - [GhostNet_x1_3_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_ssld_pretrained.pdparams) - - -- SEResNeXt and Res2Net series - - ResNeXt series[[7](#ref7)]([paper link](https://arxiv.org/abs/1611.05431)) - - [ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams) - - [ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams) - - [ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams) - - [ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams) - - [ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams) - - [ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams) - - ResNeXt_vd series - - [ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams) - - [ResNeXt50_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams) - - [ResNeXt101_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams) - - [ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams) - - [ResNeXt152_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams) - - [ResNeXt152_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams) - - SE_ResNet_vd series[[8](#ref8)]([paper link](https://arxiv.org/abs/1709.01507)) - - [SE_ResNet18_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams) - - [SE_ResNet34_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams) - - [SE_ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams) - - SE_ResNeXt series - - [SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams) - - [SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams) - - SE_ResNeXt_vd series - - [SE_ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams) - - [SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams) - - Res2Net series[[9](#ref9)]([paper link](https://arxiv.org/abs/1904.01169)) - - [Res2Net50_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams) - - [Res2Net50_vd_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams) - - [Res2Net50_vd_26w_4s_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_ssld_pretrained.pdparams) - - [Res2Net50_14w_8s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams) - - [Res2Net101_vd_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams) - - [Res2Net101_vd_26w_4s_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_ssld_pretrained.pdparams) - - [Res2Net200_vd_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams) - - [Res2Net200_vd_26w_4s_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_ssld_pretrained.pdparams) - - -- Inception series - - GoogLeNet series[[10](#ref10)]([paper link](https://arxiv.org/pdf/1409.4842.pdf)) - - [GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams) - - InceptionV3 series[[26](#ref26)]([paper link](https://arxiv.org/abs/1512.00567)) - - [InceptionV3](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV3_pretrained.pdparams) - - InceptionV4 series[[11](#ref11)]([paper link](https://arxiv.org/abs/1602.07261)) - - [InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams) - - Xception series[[12](#ref12)]([paper link](http://openaccess.thecvf.com/content_cvpr_2017/html/Chollet_Xception_Deep_Learning_CVPR_2017_paper.html)) - - [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams) - - [Xception41_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams) - - [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams) - - [Xception65_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams) - - [Xception71](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams) - - -- HRNet series - - HRNet series[[13](#ref13)]([paper link](https://arxiv.org/abs/1908.07919)) - - [HRNet_W18_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W18_C_pretrained.pdparams) - - [HRNet_W18_C_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W18_C_ssld_pretrained.pdparams) - - [HRNet_W30_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W30_C_pretrained.pdparams) - - [HRNet_W32_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W32_C_pretrained.pdparams) - - [HRNet_W40_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W40_C_pretrained.pdparams) - - [HRNet_W44_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W44_C_pretrained.pdparams) - - [HRNet_W48_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W48_C_pretrained.pdparams) - - [HRNet_W48_C_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W48_C_ssld_pretrained.pdparams) - - [HRNet_W64_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W64_C_pretrained.pdparams) - - [SE_HRNet_W64_C_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_HRNet_W64_C_ssld_pretrained.pdparams) - - -- DPN and DenseNet series - - DPN series[[14](#ref14)]([paper link](https://arxiv.org/abs/1707.01629)) - - [DPN68](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams) - - [DPN92](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams) - - [DPN98](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams) - - [DPN107](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams) - - [DPN131](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams) - - DenseNet series[[15](#ref15)]([paper link](https://arxiv.org/abs/1608.06993)) - - [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams) - - [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams) - - [DenseNet169](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams) - - [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams) - - [DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams) - - -- EfficientNet and ResNeXt101_wsl series - - EfficientNet series[[16](#ref16)]([paper link](https://arxiv.org/abs/1905.11946)) - - [EfficientNetB0_small](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams) - - [EfficientNetB0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams) - - [EfficientNetB1](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams) - - [EfficientNetB2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams) - - [EfficientNetB3](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams) - - [EfficientNetB4](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams) - - [EfficientNetB5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams) - - [EfficientNetB6](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams) - - [EfficientNetB7](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams) - - ResNeXt101_wsl series[[17](#ref17)]([paper link](https://arxiv.org/abs/1805.00932)) - - [ResNeXt101_32x8d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams) - - [ResNeXt101_32x16d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x16d_wsl_pretrained.pdparams) - - [ResNeXt101_32x32d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams) - - [ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams) - - [Fix_ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Fix_ResNeXt101_32x48d_wsl_pretrained.pdparams) - - - -- ResNeSt and RegNet series - - ResNeSt series[[24](#ref24)]([paper link](https://arxiv.org/abs/2004.08955)) - - [ResNeSt50_fast_1s1x64d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams) - - [ResNeSt50](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams) - - RegNet series[[25](#ref25)]([paper link](https://arxiv.org/abs/2003.13678)) - - [RegNetX_4GF](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams) - - -- Transformer series - - Swin-transformer series[[27](#ref27)]([paper link](https://arxiv.org/pdf/2103.14030.pdf)) - - [SwinTransformer_tiny_patch4_window7_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams) - - [SwinTransformer_small_patch4_window7_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams) - - [SwinTransformer_base_patch4_window7_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams) - - [SwinTransformer_base_patch4_window12_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams) - - [SwinTransformer_base_patch4_window7_224_22k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_22k_pretrained.pdparams) - - [SwinTransformer_base_patch4_window7_224_22kto1k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_22kto1k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window12_384_22k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window12_384_22kto1k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window7_224_22k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window7_224_22kto1k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams) - - - -- Other models - - AlexNet series[[18](#ref18)]([paper link](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)) - - [AlexNet](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams) - - SqueezeNet series[[19](#ref19)]([paper link](https://arxiv.org/abs/1602.07360)) - - [SqueezeNet1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams) - - [SqueezeNet1_1](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams) - - VGG series[[20](#ref20)]([paper link](https://arxiv.org/abs/1409.1556)) - - [VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG11_pretrained.pdparams) - - [VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG13_pretrained.pdparams) - - [VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG16_pretrained.pdparams) - - [VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG19_pretrained.pdparams) - - DarkNet series[[21](#ref21)]([paper link](https://arxiv.org/abs/1506.02640)) - - [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams) - -**Note**: The pretrained models of EfficientNetB1-B7 in the above models are transferred from [pytorch version of EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch), and the ResNeXt101_wsl series of pretrained models are transferred from [Official repo](https://github.com/facebookresearch/WSL-Images), the remaining pretrained models are obtained by training with the PaddlePaddle framework, and the corresponding training hyperparameters are given in configs. - -## References - - -[1] He K, Zhang X, Ren S, et al. Deep residual learning for image recognition[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778. - -[2] He T, Zhang Z, Zhang H, et al. Bag of tricks for image classification with convolutional neural networks[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019: 558-567. - -[3] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1314-1324. - -[4] Sandler M, Howard A, Zhu M, et al. Mobilenetv2: Inverted residuals and linear bottlenecks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 4510-4520. - -[5] Howard A G, Zhu M, Chen B, et al. Mobilenets: Efficient convolutional neural networks for mobile vision applications[J]. arXiv preprint arXiv:1704.04861, 2017. - -[6] Ma N, Zhang X, Zheng H T, et al. Shufflenet v2: Practical guidelines for efficient cnn architecture design[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 116-131. - -[7] Xie S, Girshick R, Dollár P, et al. Aggregated residual transformations for deep neural networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1492-1500. - - -[8] Hu J, Shen L, Sun G. Squeeze-and-excitation networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 7132-7141. - - -[9] Gao S, Cheng M M, Zhao K, et al. Res2net: A new multi-scale backbone architecture[J]. IEEE transactions on pattern analysis and machine intelligence, 2019. - -[10] Szegedy C, Liu W, Jia Y, et al. Going deeper with convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 1-9. - - -[11] Szegedy C, Ioffe S, Vanhoucke V, et al. Inception-v4, inception-resnet and the impact of residual connections on learning[C]//Thirty-first AAAI conference on artificial intelligence. 2017. - -[12] Chollet F. Xception: Deep learning with depthwise separable convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1251-1258. - -[13] Wang J, Sun K, Cheng T, et al. Deep high-resolution representation learning for visual recognition[J]. arXiv preprint arXiv:1908.07919, 2019. - -[14] Chen Y, Li J, Xiao H, et al. Dual path networks[C]//Advances in neural information processing systems. 2017: 4467-4475. - -[15] Huang G, Liu Z, Van Der Maaten L, et al. Densely connected convolutional networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 4700-4708. - - -[16] Tan M, Le Q V. Efficientnet: Rethinking model scaling for convolutional neural networks[J]. arXiv preprint arXiv:1905.11946, 2019. - -[17] Mahajan D, Girshick R, Ramanathan V, et al. Exploring the limits of weakly supervised pretraining[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 181-196. - -[18] Krizhevsky A, Sutskever I, Hinton G E. Imagenet classification with deep convolutional neural networks[C]//Advances in neural information processing systems. 2012: 1097-1105. - -[19] Iandola F N, Han S, Moskewicz M W, et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size[J]. arXiv preprint arXiv:1602.07360, 2016. - -[20] Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556, 2014. - -[21] Redmon J, Divvala S, Girshick R, et al. You only look once: Unified, real-time object detection[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 779-788. - -[22] Ding X, Guo Y, Ding G, et al. Acnet: Strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1911-1920. - -[23] Han K, Wang Y, Tian Q, et al. GhostNet: More features from cheap operations[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 1580-1589. - -[24] Zhang H, Wu C, Zhang Z, et al. Resnest: Split-attention networks[J]. arXiv preprint arXiv:2004.08955, 2020. - -[25] Radosavovic I, Kosaraju R P, Girshick R, et al. Designing network design spaces[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 10428-10436. - -[26] C.Szegedy, V.Vanhoucke, S.Ioffe, J.Shlens, and Z.Wojna. Rethinking the inception architecture for computer vision. arXiv preprint arXiv:1512.00567, 2015. - -[27] Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin and Baining Guo. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. diff --git a/docs/en/models_training/index.rst b/docs/en/models_training/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..1d27e65bd5242763c8cdc02edec53161065c879c --- /dev/null +++ b/docs/en/models_training/index.rst @@ -0,0 +1,10 @@ +models_training +================================ + +.. toctree:: + :maxdepth: 2 + + config_description_en.md + recognition_en.md + classification_en.md + train_strategy_en.md diff --git a/docs/en/others/VisualDL_en.md b/docs/en/others/VisualDL_en.md index cbd096ba89fcc1c6a588d7d9da5a1d2ac1e3783d..34ff4b8abb3dfffdde4034fe375eb6ee65209e35 100644 --- a/docs/en/others/VisualDL_en.md +++ b/docs/en/others/VisualDL_en.md @@ -52,6 +52,6 @@ More information about the command,please refer to [VisualDL](https://github.c Then you can enter the address `127.0.0.1:8840` and view the training process in the browser: -
- -
+ +![](../../images/VisualDL/train_loss.png) + diff --git a/docs/en/others/index.rst b/docs/en/others/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..d106343c871e4e5741cf3d2434562c9f220fa7bf --- /dev/null +++ b/docs/en/others/index.rst @@ -0,0 +1,15 @@ +others +================================ + +.. toctree:: + :maxdepth: 2 + + transfer_learning_en.md + train_with_DALI_en.md + VisualDL_en.md + train_on_xpu_en.md + feature_visiualization_en.md + paddle_mobile_inference_en.md + competition_support_en.md + update_history_en.md + versions_en.md diff --git a/docs/en/others/paddle_mobile_inference_en.md b/docs/en/others/paddle_mobile_inference_en.md index 153c96ffa44284742afa1acb8f9efe4e597414bd..4cb73179033ab54de7d09a8d173066f627820401 100644 --- a/docs/en/others/paddle_mobile_inference_en.md +++ b/docs/en/others/paddle_mobile_inference_en.md @@ -1,4 +1,4 @@ -# Paddle-Lite +# Benchmark on Mobile --- diff --git a/docs/en/quick_start/index.rst b/docs/en/quick_start/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..2edd78ddec35abbb56e6b3a258cf3b16356fd136 --- /dev/null +++ b/docs/en/quick_start/index.rst @@ -0,0 +1,10 @@ +quick_start +================================ + +.. toctree:: + :maxdepth: 2 + + quick_start_classification_new_user_en.md + quick_start_classification_professional_en.md + quick_start_recognition_en.md + quick_start_multilabel_classification_en.md diff --git a/docs/en/quick_start/quick_start_classification_new_user_en.md b/docs/en/quick_start/quick_start_classification_new_user_en.md index bf668b7b291d1b407c1dcbcc665159e3628da210..12a5e741e52256557155bd47ac952e57ea95ef6f 100644 --- a/docs/en/quick_start/quick_start_classification_new_user_en.md +++ b/docs/en/quick_start/quick_start_classification_new_user_en.md @@ -78,7 +78,7 @@ After the unzip operation is completed, there are three `.txt` files for trainin The image files of the flowers102 dataset are stored in the `dataset/flowers102/jpg` directory. The image examples are as follows:
- +![](../../images/quick_start/Examples-Flower-102.png)
Return to the root directory of `PaddleClas`: @@ -148,9 +148,7 @@ python tools/train.py -c ./ppcls/configs/quick_start/ResNet50_vd.yaml After the training is completed, the `Top1 Acc` curve of the validation set is shown below, and the highest accuracy rate is 0.2735. -
- -
+![](../../images/quick_start/r50_vd_acc.png) #### 4.2.2 Use pre-trained models for training @@ -165,9 +163,7 @@ python tools/train.py -c ./ppcls/configs/quick_start/ResNet50_vd.yaml -o Arch.pr The `Top1 Acc` curve of the validation set is shown below. The highest accuracy rate is `0.9402`. After loading the pre-trained model, the accuracy of the flowers102 data set has been greatly improved, and the absolute accuracy has increased by more than 65%. -
- -
+![](../../images/quick_start/r50_vd_pretrained_acc.png) ## 5. Model prediction diff --git a/docs/en/quick_start/quick_start_recognition_en.md b/docs/en/quick_start/quick_start_recognition_en.md index aebc9154a1242a0d2234e0171e6b0ce526e2ebc7..61c6f2309770e1b888712cc7919d93c9fcdf26b8 100644 --- a/docs/en/quick_start/quick_start_recognition_en.md +++ b/docs/en/quick_start/quick_start_recognition_en.md @@ -165,9 +165,7 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.u The image to be retrieved is shown below. -
- -
+![](../../images/recognition/product_demo/query/daoxiangcunjinzhubing_6.jpg) The final output is shown below. @@ -182,9 +180,7 @@ where bbox indicates the location of the detected object, rec_docs indicates the The detection result is also saved in the folder `output`, for this image, the visualization result is as follows. -
- -
+![](../../images/recognition/product_demo/result/daoxiangcunjinzhubing_6_en.jpg) @@ -228,9 +224,7 @@ python3.7 python/predict_system.py -c configs/inference_product.yaml -o Global.i The image to be retrieved is shown below. -
- -
+![](../../images/recognition/product_demo/query/anmuxi.jpg) The output is empty. @@ -298,6 +292,5 @@ The output is as follows: The final recognition result is `Anmuxi Ambrosial Yogurt`, which is corrrect, the visualization result is as follows. -
- +![](../../images/recognition/product_demo/result/anmuxi_en.jpg)
diff --git a/docs/zh_CN/algorithm_introduction/ImageNet_models.md b/docs/zh_CN/algorithm_introduction/ImageNet_models.md index 0cbf58fccef6c196b3836366d39fe1c9e175d130..a61d882e70aa2dc54adef23c9ec125024233889b 100644 --- a/docs/zh_CN/algorithm_introduction/ImageNet_models.md +++ b/docs/zh_CN/algorithm_introduction/ImageNet_models.md @@ -31,6 +31,7 @@ - [21. RedNet 系列](#21) - [22. TNT 系列](#22) - [23. 其他模型](#23) +- [参考文献](#reference) @@ -65,7 +66,7 @@ | 模型 | Top-1 Acc | Reference
Top-1 Acc | Acc gain | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |---------------------|-----------|-----------|---------------|----------------|-----------|----------|-----------|-----------------------------------|-----------------------------------|-----------------------------------| -| ResNet34_vd_ssld | 0.797 | 0.760 | 0.037 | 2.00 | 3.28 | 5.84 | 3.93 | 21.84 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_ssld_pretrained.pdparams)   | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet34_vd_ssld.tar)   | +| ResNet34_vd_ssld | 0.797 | 0.760 | 0.037 | 2.00 | 3.28 | 5.84 | 3.93 | 21.84 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet34_vd_ssld_pretrained.pdparams)   | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet34_vd_ssld_infer.tar)   | | ResNet50_vd_ssld | 0.830 | 0.792 | 0.039 | 2.60 | 4.86 | 7.63 | 4.35 | 25.63 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet50_vd_ssld_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_ssld_infer.tar) | | ResNet101_vd_ssld | 0.837 | 0.802 | 0.035 | 4.43 | 8.25 | 12.60 | 8.08 | 44.67 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/ResNet101_vd_ssld_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet101_vd_ssld_infer.tar) | | Res2Net50_vd_26w_4s_ssld | 0.831 | 0.798 | 0.033 | 3.59 | 6.35 | 9.50 | 4.28 | 25.76 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_ssld_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/Res2Net50_vd_26w_4s_ssld_infer.tar) | @@ -98,14 +99,11 @@ | PPLCNet_x1_0_ssld | 0.744 | 0.713 | 0.033 | 2.46 | 160.81 | 2.96 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_ssld_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PPLCNet_x1_0_ssld_infer.tar) | | PPLCNet_x2_5_ssld | 0.808 | 0.766 | 0.042 | 5.39 | 906.49 | 9.04 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_ssld_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/PPLCNet_x2_5_ssld_infer.tar) | - - - * 注: `Reference Top-1 Acc` 表示 PaddleClas 基于 ImageNet1k 数据集训练得到的预训练模型精度。 -## 3. PP-LCNet 系列 +## 3. PP-LCNet 系列 [[28](#ref28)] PP-LCNet 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[PP-LCNet 系列模型文档](../models/PP-LCNet.md)。 @@ -122,7 +120,7 @@ PP-LCNet 系列模型的精度、速度指标如下表所示,更多关于该 -## 4. ResNet 系列 +## 4. ResNet 系列 [[1](#ref1)] ResNet 及其 Vd 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[ResNet 及其 Vd 系列模型文档](../models/ResNet_and_vd.md)。 @@ -146,7 +144,7 @@ ResNet 及其 Vd 系列模型的精度、速度指标如下表所示,更多关 -## 5. 移动端系列 +## 5. 移动端系列 [[3](#ref3)][[4](#ref4)][[5](#ref5)][[6](#ref6)][[23](#ref23)] 移动端系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[移动端系列模型文档](../models/Mobile.md)。 @@ -195,7 +193,7 @@ ResNet 及其 Vd 系列模型的精度、速度指标如下表所示,更多关 -## 6. SEResNeXt 与 Res2Net 系列 +## 6. SEResNeXt 与 Res2Net 系列 [[7](#ref7)][[8](#ref8)][[9](#ref9)] SEResNeXt 与 Res2Net 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[SEResNeXt 与 Res2Net 系列模型文档](../models/SEResNext_and_Res2Net.md)。 @@ -230,7 +228,7 @@ SEResNeXt 与 Res2Net 系列模型的精度、速度指标如下表所示,更 -## 7. DPN 与 DenseNet 系列 +## 7. DPN 与 DenseNet 系列 [[14](#ref14)][[15](#ref15)] DPN 与 DenseNet 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[DPN 与 DenseNet 系列模型文档](../models/DPN_DenseNet.md)。 @@ -248,15 +246,12 @@ DPN 与 DenseNet 系列模型的精度、速度指标如下表所示,更多关 | DPN107 | 0.8089 | 0.9532 | 19.46 | 35.62 | 50.22 | 18.38 | 87.13 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DPN107_infer.tar) | | DPN131 | 0.8070 | 0.9514 | 19.64 | 34.60 | 47.42 | 16.09 | 79.48 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DPN131_infer.tar) | - - -## 8. HRNet 系列 +## 8. HRNet 系列 [[13](#ref13)] HRNet 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[HRNet 系列模型文档](../models/HRNet.md)。 - | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |-------------|-----------|-----------|------------------|------------------|----------|-----------|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------| | HRNet_W18_C | 0.7692 | 0.9339 | 6.66 | 8.94 | 11.95 | 4.32 | 21.35 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/HRNet_W18_C_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/HRNet_W18_C_infer.tar) | @@ -272,7 +267,7 @@ HRNet 系列模型的精度、速度指标如下表所示,更多关于该系 -## 9. Inception 系列 +## 9. Inception 系列 [[10](#ref10)][[11](#ref11)][[12](#ref12)][[26](#ref26)] Inception 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[Inception 系列模型文档](../models/Inception.md)。 @@ -289,11 +284,10 @@ Inception 系列模型的精度、速度指标如下表所示,更多关于该 -## 10. EfficientNet 与 ResNeXt101_wsl 系列 +## 10. EfficientNet 与 ResNeXt101_wsl 系列 [[16](#ref16)][[17](#ref17)] EfficientNet 与 ResNeXt101_wsl 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[EfficientNet 与 ResNeXt101_wsl 系列模型文档](../models/EfficientNet_and_ResNeXt101_wsl.md)。 - | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |---------------------------|-----------|-----------|------------------|------------------|----------|-----------|----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------| | ResNeXt101_
32x8d_wsl | 0.8255 | 0.9674 | 13.55 | 23.39 | 36.18 | 16.48 | 88.99 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeXt101_32x8d_wsl_infer.tar) | @@ -313,11 +307,10 @@ EfficientNet 与 ResNeXt101_wsl 系列模型的精度、速度指标如下表所 -## 11. ResNeSt 与 RegNet 系列 +## 11. ResNeSt 与 RegNet 系列 [[24](#ref24)][[25](#ref25)] ResNeSt 与 RegNet 系列模型的精度、速度指标如下表所示,更多关于该系列的模型介绍可以参考:[ResNeSt 与 RegNet 系列模型文档](../models/ResNeSt_RegNet.md)。 - | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| | ResNeSt50_
fast_1s1x64d | 0.8035 | 0.9528 | 2.73 | 5.33 | 8.24 | 4.36 | 26.27 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNeSt50_fast_1s1x64d_infer.tar) | @@ -326,11 +319,10 @@ ResNeSt 与 RegNet 系列模型的精度、速度指标如下表所示,更多 -## 12. ViT_and_DeiT 系列 +## 12. ViT_and_DeiT 系列 [[31](#ref31)][[32](#ref32)] ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模型的精度、速度指标如下表所示. 更多关于该系列模型的介绍可以参考: [ViT_and_DeiT 系列模型文档](../models/ViT_and_DeiT.md)。 - | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |------------------------|-----------|-----------|------------------|------------------|----------|------------------------|------------------------|------------------------|------------------------| | ViT_small_
patch16_224 | 0.7769 | 0.9342 | 3.71 | 9.05 | 16.72 | 9.41 | 48.60 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_small_patch16_224_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ViT_small_patch16_224_infer.tar) | @@ -341,8 +333,6 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 |ViT_large_
patch16_384| 0.8513 | 0.9736 | 39.51 | 152.46 | 304.06 | 174.70 | 304.12 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ViT_large_patch16_384_infer.tar) | |ViT_large_
patch32_384| 0.8153 | 0.9608 | 11.44 | 36.09 | 70.63 | 44.24 | 306.48 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ViT_large_patch32_384_infer.tar) | - - | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |------------------------|-----------|-----------|------------------|------------------|----------|------------------------|------------------------|------------------------|------------------------| | DeiT_tiny_
patch16_224 | 0.718 | 0.910 | 3.61 | 3.94 | 6.10 | 1.07 | 5.68 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DeiT_tiny_patch16_224_infer.tar) | @@ -356,11 +346,10 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 13. RepVGG 系列 +## 13. RepVGG 系列 [[36](#ref36)] 关于 RepVGG 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[RepVGG 系列模型文档](../models/RepVGG.md)。 - | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| | RepVGG_A0 | 0.7131 | 0.9016 | | | | 1.36 | 8.31 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/RepVGG_A0_infer.tar) | @@ -376,7 +365,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 14. MixNet 系列 +## 14. MixNet 系列 [[29](#ref29)] 关于 MixNet 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[MixNet 系列模型文档](../models/MixNet.md)。 @@ -388,7 +377,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 15. ReXNet 系列 +## 15. ReXNet 系列 [[30](#ref30)] 关于 ReXNet 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[ReXNet 系列模型文档](../models/ReXNet.md)。 @@ -402,7 +391,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 16. SwinTransformer 系列 +## 16. SwinTransformer 系列 [[27](#ref27)] 关于 SwinTransformer 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[SwinTransformer 系列模型文档](../models/SwinTransformer.md)。 @@ -414,20 +403,20 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 | SwinTransformer_base_patch4_window12_384 | 0.8439 | 0.9693 | 19.52 | 64.56 | 123.30 | 44.45 | 87.70 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_base_patch4_window12_384_infer.tar) | | SwinTransformer_base_patch4_window7_224[1] | 0.8487 | 0.9746 | 13.53 | 23.46 | 39.13 | 15.13 | 87.70 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_22kto1k_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_base_patch4_window7_224_infer.tar) | | SwinTransformer_base_patch4_window12_384[1] | 0.8642 | 0.9807 | 19.65 | 64.72 | 123.42 | 44.45 | 87.70 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_22kto1k_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_base_patch4_window12_384_infer.tar) | -| SwinTransformer_large_patch4_window7_224[1] | 0.8596 | 0.9783 | 15.74 | 38.57 | 71.49 | 34.02 | 196.43 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window7_224_infer.tar) | -| SwinTransformer_large_patch4_window12_384[1] | 0.8719 | 0.9823 | 32.61 | 116.59 | 223.23 | 99.97 | 196.43 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window12_384_infer.tar) | +| SwinTransformer_large_patch4_window7_224[1] | 0.8596 | 0.9783 | 15.74 | 38.57 | 71.49 | 34.02 | 196.43 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window7_224_22kto1k_infer.tar) | +| SwinTransformer_large_patch4_window12_384[1] | 0.8719 | 0.9823 | 32.61 | 116.59 | 223.23 | 99.97 | 196.43 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/SwinTransformer_large_patch4_window12_384_22kto1k_infer.tar) | [1]:基于 ImageNet22k 数据集预训练,然后在 ImageNet1k 数据集迁移学习得到。 -## 17. LeViT 系列 +## 17. LeViT 系列 [[33](#ref33)] 关于 LeViT 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[LeViT 系列模型文档](../models/LeViT.md)。 | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(M) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | | ---------- | --------- | --------- | ---------------- | ---------------- | -------- | --------- | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | -| LeViT_128S | 0.7598 | 0.9269 | | | | 281 | 7.42 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/eViT_128S_infer.tar) | +| LeViT_128S | 0.7598 | 0.9269 | | | | 281 | 7.42 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_128S_infer.tar) | | LeViT_128 | 0.7810 | 0.9371 | | | | 365 | 8.87 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_128_infer.tar) | | LeViT_192 | 0.7934 | 0.9446 | | | | 597 | 10.61 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_192_infer.tar) | | LeViT_256 | 0.8085 | 0.9497 | | | | 1049 | 18.45 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/LeViT_256_infer.tar) | @@ -437,7 +426,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 18. Twins 系列 +## 18. Twins 系列 [[34](#ref34)] 关于 Twins 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[Twins 系列模型文档](../models/Twins.md)。 @@ -454,7 +443,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 19. HarDNet 系列 +## 19. HarDNet 系列 [[37](#ref37)] 关于 HarDNet 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[HarDNet 系列模型文档](../models/HarDNet.md)。 @@ -467,7 +456,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 20. DLA 系列 +## 20. DLA 系列 [[38](#ref38)] 关于 DLA 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[DLA 系列模型文档](../models/DLA.md)。 @@ -485,7 +474,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 21. RedNet 系列 +## 21. RedNet 系列 [[39](#ref39)] 关于 RedNet 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[RedNet 系列模型文档](../models/RedNet.md)。 @@ -499,7 +488,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 -## 22. TNT 系列 +## 22. TNT 系列 [[35](#ref35)] 关于 TNT 系列模型的精度、速度指标如下表所示,更多介绍可以参考:[TNT 系列模型文档](../models/TNT.md)。 @@ -513,8 +502,7 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 ## 23. 其他模型 -关于 AlexNet、SqueezeNet 系列、VGG 系列、DarkNet53 等模型的精度、速度指标如下表所示,更多介绍可以参考:[其他模型文档](../models/Others.md)。 - +关于 AlexNet [[18](#ref18)]、SqueezeNet 系列 [[19](#ref19)]、VGG 系列 [[20](#ref20)]、DarkNet53 [[21](#ref21)] 等模型的精度、速度指标如下表所示,更多介绍可以参考:[其他模型文档](../models/Others.md)。 | 模型 | Top-1 Acc | Top-5 Acc | time(ms)
bs=1 | time(ms)
bs=4 | time(ms)
bs=8 | FLOPs(G) | Params(M) | 预训练模型下载地址 | inference模型下载地址 | |------------------------|-----------|-----------|------------------|------------------|----------|-----------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------| @@ -526,3 +514,86 @@ ViT(Vision Transformer) 与 DeiT(Data-efficient Image Transformers)系列模 | VGG16 | 0.720 | 0.907 | 2.48 | 6.79 | 12.33 | 15.470 | 138.35 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG16_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VGG16_infer.tar) | | VGG19 | 0.726 | 0.909 | 2.93 | 8.28 | 15.21 | 19.63 | 143.66 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/VGG19_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/VGG19_infer.tar) | | DarkNet53 | 0.780 | 0.941 | 2.79 | 6.42 | 10.89 | 9.31 | 41.65 | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams) | [下载链接](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/DarkNet53_infer.tar) | + + + +## 参考文献 + +[1] He K, Zhang X, Ren S, et al. Deep residual learning for image recognition[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778. + +[2] He T, Zhang Z, Zhang H, et al. Bag of tricks for image classification with convolutional neural networks[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019: 558-567. + +[3] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1314-1324. + +[4] Sandler M, Howard A, Zhu M, et al. Mobilenetv2: Inverted residuals and linear bottlenecks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 4510-4520. + +[5] Howard A G, Zhu M, Chen B, et al. Mobilenets: Efficient convolutional neural networks for mobile vision applications[J]. arXiv preprint arXiv:1704.04861, 2017. + +[6] Ma N, Zhang X, Zheng H T, et al. Shufflenet v2: Practical guidelines for efficient cnn architecture design[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 116-131. + +[7] Xie S, Girshick R, Dollár P, et al. Aggregated residual transformations for deep neural networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1492-1500. + +[8] Hu J, Shen L, Sun G. Squeeze-and-excitation networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 7132-7141. + +[9] Gao S, Cheng M M, Zhao K, et al. Res2net: A new multi-scale backbone architecture[J]. IEEE transactions on pattern analysis and machine intelligence, 2019. + +[10] Szegedy C, Liu W, Jia Y, et al. Going deeper with convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 1-9. + +[11] Szegedy C, Ioffe S, Vanhoucke V, et al. Inception-v4, inception-resnet and the impact of residual connections on learning[C]//Thirty-first AAAI conference on artificial intelligence. 2017. + +[12] Chollet F. Xception: Deep learning with depthwise separable convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1251-1258. + +[13] Wang J, Sun K, Cheng T, et al. Deep high-resolution representation learning for visual recognition[J]. arXiv preprint arXiv:1908.07919, 2019. + +[14] Chen Y, Li J, Xiao H, et al. Dual path networks[C]//Advances in neural information processing systems. 2017: 4467-4475. + +[15] Huang G, Liu Z, Van Der Maaten L, et al. Densely connected convolutional networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 4700-4708. + +[16] Tan M, Le Q V. Efficientnet: Rethinking model scaling for convolutional neural networks[J]. arXiv preprint arXiv:1905.11946, 2019. + +[17] Mahajan D, Girshick R, Ramanathan V, et al. Exploring the limits of weakly supervised pretraining[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 181-196. + +[18] Krizhevsky A, Sutskever I, Hinton G E. Imagenet classification with deep convolutional neural networks[C]//Advances in neural information processing systems. 2012: 1097-1105. + +[19] Iandola F N, Han S, Moskewicz M W, et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size[J]. arXiv preprint arXiv:1602.07360, 2016. + +[20] Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556, 2014. + +[21] Redmon J, Divvala S, Girshick R, et al. You only look once: Unified, real-time object detection[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 779-788. + +[22] Ding X, Guo Y, Ding G, et al. Acnet: Strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1911-1920. + +[23] Han K, Wang Y, Tian Q, et al. GhostNet: More features from cheap operations[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 1580-1589. + +[24] Zhang H, Wu C, Zhang Z, et al. Resnest: Split-attention networks[J]. arXiv preprint arXiv:2004.08955, 2020. + +[25] Radosavovic I, Kosaraju R P, Girshick R, et al. Designing network design spaces[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 10428-10436. + +[26] C.Szegedy, V.Vanhoucke, S.Ioffe, J.Shlens, and Z.Wojna. Rethinking the inception architecture for computer vision. arXiv preprint arXiv:1512.00567, 2015. + +[27] Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin and Baining Guo. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. + +[28]Cheng Cui, Tingquan Gao, Shengyu Wei, Yuning Du, Ruoyu Guo, Shuilong Dong, Bin Lu, Ying Zhou, Xueying Lv, Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LCNet: A Lightweight CPU Convolutional Neural Network. + +[29]Mingxing Tan, Quoc V. Le. MixConv: Mixed Depthwise Convolutional Kernels. + +[30]Dongyoon Han, Sangdoo Yun, Byeongho Heo, YoungJoon Yoo. Rethinking Channel Dimensions for Efficient Model Design. + +[31]Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. AN IMAGE IS WORTH 16X16 WORDS: +TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE. + +[32]Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Herve Jegou. Training data-efficient image transformers & distillation through attention. + +[33]Benjamin Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Herve Jegou, Matthijs Douze. LeViT: a Vision Transformer in ConvNet’s Clothing for Faster Inference. + +[34]Xiangxiang Chu, Zhi Tian, Yuqing Wang, Bo Zhang, Haibing Ren, Xiaolin Wei, Huaxia Xia, Chunhua Shen. Twins: Revisiting the Design of Spatial Attention in Vision Transformers. + +[35]Kai Han, An Xiao, Enhua Wu, Jianyuan Guo, Chunjing Xu, Yunhe Wang. Transformer in Transformer. + +[36]Xiaohan Ding, Xiangyu Zhang, Ningning Ma, Jungong Han, Guiguang Ding, Jian Sun. RepVGG: Making VGG-style ConvNets Great Again. + +[37]Ping Chao, Chao-Yang Kao, Yu-Shan Ruan, Chien-Hsiang Huang, Youn-Long Lin. HarDNet: A Low Memory Traffic Network. + +[38]Fisher Yu, Dequan Wang, Evan Shelhamer, Trevor Darrell. Deep Layer Aggregation. + +[39]Duo Lim Jie Hu, Changhu Wang, Xiangtai Li, Qi She, Lei Zhu, Tong Zhang, Qifeng Chen. Involution: Inverting the Inherence of Convolution for Visual Recognition. diff --git a/docs/zh_CN/image_recognition_pipeline/feature_extraction.md b/docs/zh_CN/image_recognition_pipeline/feature_extraction.md index 1a3820880dc253a398a071f5aba54dfde143a4f0..1438e9661200ede1adf67cf6813f763c3a13c095 100644 --- a/docs/zh_CN/image_recognition_pipeline/feature_extraction.md +++ b/docs/zh_CN/image_recognition_pipeline/feature_extraction.md @@ -159,7 +159,7 @@ python -m paddle.distributed.launch \ #### 4.4.1 导出推理模型 ``` -python tools/export_model \ +python tools/export_model.py \ -c ppcls/configs/GeneralRecognition/GeneralRecognition_PPLCNet_x2_5.yaml \ -o Global.pretrained_model="output/RecModel/best_model" ``` diff --git a/docs/zh_CN/models/models_intro.md b/docs/zh_CN/models/models_intro.md deleted file mode 100644 index f529307f7af85f049f128cce09fe484ae21174e8..0000000000000000000000000000000000000000 --- a/docs/zh_CN/models/models_intro.md +++ /dev/null @@ -1,406 +0,0 @@ -# 模型库概览 ---- -## 目录 - -* [1. 概述](#1) -* [2. 评估环境](#2) -* [3. 预训练模型列表及下载地址](#3) -* [4. 参考文献](#4) - - - -## 1. 概述 - -基于 ImageNet1k 分类数据集,PaddleClas 支持的 36 种系列分类网络结构以及对应的 175 个图像分类预训练模型如下所示,训练技巧、每个系列网络结构的简单介绍和性能评估将在相应章节展现。 - - - -## 2. 评估环境 -* Arm 的评估环境基于骁龙 855(SD855)。 -* Intel CPU 的评估环境基于 Intel(R) Xeon(R) Gold 6148。 -* GPU 评估环境基于 V100 和 TensorRT。 - -![](../../images/models/V100_benchmark/v100.fp32.bs1.main_fps_top1_s.png) - -![](../../images/models/V100_benchmark/v100.fp32.bs1.visiontransformer.png) - -> 如果您觉得此文档对您有帮助,欢迎 star 我们的项目:[https://github.com/PaddlePaddle/PaddleClas](https://github.com/PaddlePaddle/PaddleClas) - - - -## 3. 预训练模型列表及下载地址 - -- ResNet 及其 Vd 系列 - - ResNet 系列[[1](#ref1)]([论文地址](http://openaccess.thecvf.com/content_cvpr_2016/html/He_Deep_Residual_Learning_CVPR_2016_paper.html)) - - [ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_pretrained.pdparams) - - [ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet34_pretrained.pdparams) - - [ResNet50](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_pretrained.pdparams) - - [ResNet101](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet101_pretrained.pdparams) - - [ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet152_pretrained.pdparams) - - ResNet_vc、ResNet_vd 系列[[2](#ref2)]([论文地址](https://arxiv.org/abs/1812.01187)) - - [ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vc_pretrained.pdparams) - - [ResNet18_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet18_vd_pretrained.pdparams) - - [ResNet34_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet34_vd_pretrained.pdparams) - - [ResNet34_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet34_vd_ssld_pretrained.pdparams) - - [ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams) - - [ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_v2_pretrained.pdparams) - - [ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet101_vd_pretrained.pdparams) - - [ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet152_vd_pretrained.pdparams) - - [ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet200_vd_pretrained.pdparams) - - [ResNet50_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams) - - [Fix_ResNet50_vd_ssld_v2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Fix_ResNet50_vd_ssld_v2_pretrained.pdparams) - - [ResNet101_vd_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet101_vd_ssld_pretrained.pdparams) - - -- 轻量级模型系列 - - PP-LCNet 系列[[28](#ref28)]([论文地址](https://arxiv.org/pdf/2109.15099.pdf)) - - [PPLCNet_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_25_pretrained.pdparams) - - [PPLCNet_x0_35](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_35_pretrained.pdparams) - - [PPLCNet_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_pretrained.pdparams) - - [PPLCNet_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_75_pretrained.pdparams) - - [PPLCNet_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_pretrained.pdparams) - - [PPLCNet_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_5_pretrained.pdparams) - - [PPLCNet_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_0_pretrained.pdparams) - - [PPLCNet_x2_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_pretrained.pdparams) - - [PPLCNet_x0_5_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x0_5_ssld_pretrained.pdparams) - - [PPLCNet_x1_0_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x1_0_ssld_pretrained.pdparams) - - [PPLCNet_x2_5_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/legendary_models/PPLCNet_x2_5_ssld_pretrained.pdparams) - - MobileNetV3 系列[[3](#ref3)]([论文地址](https://arxiv.org/abs/1905.02244)) - - [MobileNetV3_large_x0_35](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_35_pretrained.pdparams) - - [MobileNetV3_large_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_5_pretrained.pdparams) - - [MobileNetV3_large_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x0_75_pretrained.pdparams) - - [MobileNetV3_large_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams) - - [MobileNetV3_large_x1_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_25_pretrained.pdparams) - - [MobileNetV3_small_x0_35](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_35_pretrained.pdparams) - - [MobileNetV3_small_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_5_pretrained.pdparams) - - [MobileNetV3_small_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x0_75_pretrained.pdparams) - - [MobileNetV3_small_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_0_pretrained.pdparams) - - [MobileNetV3_small_x1_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_25_pretrained.pdparams) - - [MobileNetV3_large_x1_0_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_ssld_pretrained.pdparams) - - [MobileNetV3_large_x1_0_ssld_int8]()(coming soon) - - [MobileNetV3_small_x1_0_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_small_x1_0_ssld_pretrained.pdparams) - - MobileNetV2 系列[[4](#ref4)]([论文地址](https://arxiv.org/abs/1801.04381)) - - [MobileNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams) - - [MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams) - - [MobileNetV2_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams) - - [MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams) - - [MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams) - - [MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams) - - [MobileNetV2_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_ssld_pretrained.pdparams) - - MobileNetV1 系列[[5](#ref5)]([论文地址](https://arxiv.org/abs/1704.04861)) - - [MobileNetV1_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_25_pretrained.pdparams) - - [MobileNetV1_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_5_pretrained.pdparams) - - [MobileNetV1_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_x0_75_pretrained.pdparams) - - [MobileNetV1](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_pretrained.pdparams) - - [MobileNetV1_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV1_ssld_pretrained.pdparams) - - ShuffleNetV2 系列[[6](#ref6)]([论文地址](https://arxiv.org/abs/1807.11164)) - - [ShuffleNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_25_pretrained.pdparams) - - [ShuffleNetV2_x0_33](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_33_pretrained.pdparams) - - [ShuffleNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x0_5_pretrained.pdparams) - - [ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_0_pretrained.pdparams) - - [ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x1_5_pretrained.pdparams) - - [ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_x2_0_pretrained.pdparams) - - [ShuffleNetV2_swish](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ShuffleNetV2_swish_pretrained.pdparams) - - GhostNet 系列[[23](#ref23)]([论文地址](https://arxiv.org/pdf/1911.11907.pdf)) - - [GhostNet_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x0_5_pretrained.pdparams) - - [GhostNet_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_0_pretrained.pdparams) - - [GhostNet_x1_3](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_pretrained.pdparams) - - [GhostNet_x1_3_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GhostNet_x1_3_ssld_pretrained.pdparams) - - MixNet 系列[[29](#ref29)]([论文地址](https://arxiv.org/pdf/1907.09595.pdf)) - - [MixNet_S](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_S_pretrained.pdparams) - - [MixNet_M](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_M_pretrained.pdparams) - - [MixNet_L](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MixNet_L_pretrained.pdparams) - - ReXNet 系列[[30](#ref30)]([论文地址](https://arxiv.org/pdf/2007.00992.pdf)) - - [ReXNet_1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_0_pretrained.pdparams) - - [ReXNet_1_3](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_3_pretrained.pdparams) - - [ReXNet_1_5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_1_5_pretrained.pdparams) - - [ReXNet_2_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_2_0_pretrained.pdparams) - - [ReXNet_3_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ReXNet_3_0_pretrained.pdparams) - - -- SEResNeXt 与 Res2Net 系列 - - ResNeXt 系列[[7](#ref7)]([论文地址](https://arxiv.org/abs/1611.05431)) - - [ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_32x4d_pretrained.pdparams) - - [ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_64x4d_pretrained.pdparams) - - [ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x4d_pretrained.pdparams) - - [ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_64x4d_pretrained.pdparams) - - [ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_32x4d_pretrained.pdparams) - - [ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_64x4d_pretrained.pdparams) - - ResNeXt_vd 系列 - - [ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_32x4d_pretrained.pdparams) - - [ResNeXt50_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt50_vd_64x4d_pretrained.pdparams) - - [ResNeXt101_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_32x4d_pretrained.pdparams) - - [ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_vd_64x4d_pretrained.pdparams) - - [ResNeXt152_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_32x4d_pretrained.pdparams) - - [ResNeXt152_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt152_vd_64x4d_pretrained.pdparams) - - SE_ResNet_vd 系列[[8](#ref8)]([论文地址](https://arxiv.org/abs/1709.01507)) - - [SE_ResNet18_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet18_vd_pretrained.pdparams) - - [SE_ResNet34_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet34_vd_pretrained.pdparams) - - [SE_ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNet50_vd_pretrained.pdparams) - - SE_ResNeXt 系列 - - [SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_32x4d_pretrained.pdparams) - - [SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt101_32x4d_pretrained.pdparams) - - SE_ResNeXt_vd 系列 - - [SE_ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_ResNeXt50_vd_32x4d_pretrained.pdparams) - - [SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SENet154_vd_pretrained.pdparams) - - Res2Net 系列[[9](#ref9)]([论文地址](https://arxiv.org/abs/1904.01169)) - - [Res2Net50_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_26w_4s_pretrained.pdparams) - - [Res2Net50_vd_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_pretrained.pdparams) - - [Res2Net50_vd_26w_4s_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_vd_26w_4s_ssld_pretrained.pdparams) - - [Res2Net50_14w_8s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net50_14w_8s_pretrained.pdparams) - - [Res2Net101_vd_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_pretrained.pdparams) - - [Res2Net101_vd_26w_4s_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net101_vd_26w_4s_ssld_pretrained.pdparams) - - [Res2Net200_vd_26w_4s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_pretrained.pdparams) - - [Res2Net200_vd_26w_4s_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Res2Net200_vd_26w_4s_ssld_pretrained.pdparams) - - -- Inception 系列 - - GoogLeNet 系列[[10](#ref10)]([论文地址](https://arxiv.org/pdf/1409.4842.pdf)) - - [GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/GoogLeNet_pretrained.pdparams) - - InceptionV3 系列[[26](#ref26)]([论文地址](https://arxiv.org/abs/1512.00567)) - - [InceptionV3](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV3_pretrained.pdparams) - - InceptionV4 系列[[11](#ref11)]([论文地址](https://arxiv.org/abs/1602.07261)) - - [InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/InceptionV4_pretrained.pdparams) - - Xception 系列[[12](#ref12)]([论文地址](http://openaccess.thecvf.com/content_cvpr_2017/html/Chollet_Xception_Deep_Learning_CVPR_2017_paper.html)) - - [Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_pretrained.pdparams) - - [Xception41_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception41_deeplab_pretrained.pdparams) - - [Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_pretrained.pdparams) - - [Xception65_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception65_deeplab_pretrained.pdparams) - - [Xception71](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Xception71_pretrained.pdparams) - - -- HRNet 系列 - - HRNet 系列[[13](#ref13)]([论文地址](https://arxiv.org/abs/1908.07919)) - - [HRNet_W18_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W18_C_pretrained.pdparams) - - [HRNet_W18_C_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W18_C_ssld_pretrained.pdparams) - - [HRNet_W30_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W30_C_pretrained.pdparams) - - [HRNet_W32_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W32_C_pretrained.pdparams) - - [HRNet_W40_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W40_C_pretrained.pdparams) - - [HRNet_W44_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W44_C_pretrained.pdparams) - - [HRNet_W48_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W48_C_pretrained.pdparams) - - [HRNet_W48_C_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W48_C_ssld_pretrained.pdparams) - - [HRNet_W64_C](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HRNet_W64_C_pretrained.pdparams) - - [SE_HRNet_W64_C_ssld](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SE_HRNet_W64_C_ssld_pretrained.pdparams) - -- DPN 与 DenseNet 系列 - - DPN 系列[[14](#ref14)]([论文地址](https://arxiv.org/abs/1707.01629)) - - [DPN68](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN68_pretrained.pdparams) - - [DPN92](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN92_pretrained.pdparams) - - [DPN98](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN98_pretrained.pdparams) - - [DPN107](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN107_pretrained.pdparams) - - [DPN131](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DPN131_pretrained.pdparams) - - DenseNet 系列[[15](#ref15)]([论文地址](https://arxiv.org/abs/1608.06993)) - - [DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet121_pretrained.pdparams) - - [DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet161_pretrained.pdparams) - - [DenseNet169](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet169_pretrained.pdparams) - - [DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet201_pretrained.pdparams) - - [DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DenseNet264_pretrained.pdparams) - - -- EfficientNet 与 ResNeXt101_wsl 系列 - - EfficientNet 系列[[16](#ref16)]([论文地址](https://arxiv.org/abs/1905.11946)) - - [EfficientNetB0_small](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_small_pretrained.pdparams) - - [EfficientNetB0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB0_pretrained.pdparams) - - [EfficientNetB1](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB1_pretrained.pdparams) - - [EfficientNetB2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB2_pretrained.pdparams) - - [EfficientNetB3](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB3_pretrained.pdparams) - - [EfficientNetB4](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB4_pretrained.pdparams) - - [EfficientNetB5](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB5_pretrained.pdparams) - - [EfficientNetB6](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB6_pretrained.pdparams) - - [EfficientNetB7](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/EfficientNetB7_pretrained.pdparams) - - ResNeXt101_wsl 系列[[17](#ref17)]([论文地址](https://arxiv.org/abs/1805.00932)) - - [ResNeXt101_32x8d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x8d_wsl_pretrained.pdparams) - - [ResNeXt101_32x16d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x16d_wsl_pretrained.pdparams) - - [ResNeXt101_32x32d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x32d_wsl_pretrained.pdparams) - - [ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeXt101_32x48d_wsl_pretrained.pdparams) - - [Fix_ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/Fix_ResNeXt101_32x48d_wsl_pretrained.pdparams) - -- ResNeSt 与 RegNet 系列 - - ResNeSt 系列[[24](#ref24)]([论文地址](https://arxiv.org/abs/2004.08955)) - - [ResNeSt50_fast_1s1x64d](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_fast_1s1x64d_pretrained.pdparams) - - [ResNeSt50](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNeSt50_pretrained.pdparams) - - RegNet 系列[[25](#ref25)]([paper link](https://arxiv.org/abs/2003.13678)) - - [RegNetX_4GF](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RegNetX_4GF_pretrained.pdparams) - -- Transformer 系列 - - Swin-transformer 系列[[27](#ref27)]([论文地址](https://arxiv.org/pdf/2103.14030.pdf)) - - [SwinTransformer_tiny_patch4_window7_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_tiny_patch4_window7_224_pretrained.pdparams) - - [SwinTransformer_small_patch4_window7_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_small_patch4_window7_224_pretrained.pdparams) - - [SwinTransformer_base_patch4_window7_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_pretrained.pdparams) - - [SwinTransformer_base_patch4_window12_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window12_384_pretrained.pdparams) - - [SwinTransformer_base_patch4_window7_224_22k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_22k_pretrained.pdparams) - - [SwinTransformer_base_patch4_window7_224_22kto1k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_base_patch4_window7_224_22kto1k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window12_384_22k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window12_384_22kto1k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window12_384_22kto1k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window7_224_22k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22k_pretrained.pdparams) - - [SwinTransformer_large_patch4_window7_224_22kto1k](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SwinTransformer_large_patch4_window7_224_22kto1k_pretrained.pdparams) - - ViT 系列[[31](#ref31)]([论文地址](https://arxiv.org/pdf/2010.11929.pdf)) - - [ViT_small_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_small_patch16_224_pretrained.pdparams) - - [ViT_base_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_224_pretrained.pdparams) - - [ViT_base_patch16_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch16_384_pretrained.pdparams) - - [ViT_base_patch32_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_base_patch32_384_pretrained.pdparams) - - [ViT_large_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_224_pretrained.pdparams) - - [ViT_large_patch16_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch16_384_pretrained.pdparams) - - [ViT_large_patch32_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ViT_large_patch32_384_pretrained.pdparams) - - DeiT 系列[[32](#ref32)]([论文地址](https://arxiv.org/pdf/2012.12877.pdf)) - - [DeiT_tiny_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_patch16_224_pretrained.pdparams) - - [DeiT_small_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_patch16_224_pretrained.pdparams) - - [DeiT_base_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_224_pretrained.pdparams) - - [DeiT_base_patch16_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_patch16_384_pretrained.pdparams) - - [DeiT_tiny_distilled_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_tiny_distilled_patch16_224_pretrained.pdparams) - - [DeiT_small_distilled_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_small_distilled_patch16_224_pretrained.pdparams) - - [DeiT_base_distilled_patch16_224](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_224_pretrained.pdparams) - - [DeiT_base_distilled_patch16_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DeiT_base_distilled_patch16_384_pretrained.pdparams) - - LeViT 系列[[33](#ref33)]([论文地址](https://arxiv.org/pdf/2104.01136.pdf)) - - [LeViT_128S](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128S_pretrained.pdparams) - - [LeViT_128](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_128_pretrained.pdparams) - - [LeViT_192](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_192_pretrained.pdparams) - - [LeViT_256](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_256_pretrained.pdparams) - - [LeViT_384](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/LeViT_384_pretrained.pdparams) - - Twins 系列[[34](#ref34)]([论文地址](https://arxiv.org/pdf/2104.13840.pdf)) - - [pcpvt_small](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_small_pretrained.pdparams) - - [pcpvt_base](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_base_pretrained.pdparams) - - [pcpvt_large](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/pcpvt_large_pretrained.pdparams) - - [alt_gvt_small](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_small_pretrained.pdparams) - - [alt_gvt_base](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_base_pretrained.pdparams) - - [alt_gvt_large](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/alt_gvt_large_pretrained.pdparams) - - TNT 系列[[35](#ref35)]([论文地址](https://arxiv.org/pdf/2103.00112.pdf)) - - [TNT_small](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/TNT_small_pretrained.pdparams) - -- 其他模型 - - AlexNet 系列[[18](#ref18)]([论文地址](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)) - - [AlexNet](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/AlexNet_pretrained.pdparams) - - SqueezeNet 系列[[19](#ref19)]([论文地址](https://arxiv.org/abs/1602.07360)) - - [SqueezeNet1_0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_0_pretrained.pdparams) - - [SqueezeNet1_1](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/SqueezeNet1_1_pretrained.pdparams) - - VGG 系列[[20](#ref20)]([论文地址](https://arxiv.org/abs/1409.1556)) - - [VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG11_pretrained.pdparams) - - [VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG13_pretrained.pdparams) - - [VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG16_pretrained.pdparams) - - [VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/VGG19_pretrained.pdparams) - - DarkNet 系列[[21](#ref21)]([论文地址](https://arxiv.org/abs/1506.02640)) - - [DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DarkNet53_pretrained.pdparams) - - RepVGG 系列[[36](#ref36)]([论文地址](https://arxiv.org/pdf/2101.03697.pdf)) - - [RepVGG_A0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A0_pretrained.pdparams) - - [RepVGG_A1](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A1_pretrained.pdparams) - - [RepVGG_A2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_A2_pretrained.pdparams) - - [RepVGG_B0](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B0_pretrained.pdparams) - - [RepVGG_B1s](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams) - - [RepVGG_B2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams) - - [RepVGG_B1g2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams) - - [RepVGG_B1g4](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams) - - [RepVGG_B2g4](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams) - - [RepVGG_B3g4](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams) - - HarDNet 系列[[37](#ref37)]([论文地址](https://arxiv.org/pdf/1909.00948.pdf)) - - [HarDNet39_ds](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet39_ds_pretrained.pdparams) - - [HarDNet68_ds](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_ds_pretrained.pdparams) - - [HarDNet68](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet68_pretrained.pdparams) - - [HarDNet85](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/HarDNet85_pretrained.pdparams) - - DLA 系列[[38](#ref38)]([论文地址](https://arxiv.org/pdf/1707.06484.pdf)) - - [DLA102](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102_pretrained.pdparams) - - [DLA102x2](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x2_pretrained.pdparams) - - [DLA102x](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA102x_pretrained.pdparams) - - [DLA169](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA169_pretrained.pdparams) - - [DLA34](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA34_pretrained.pdparams) - - [DLA46_c](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA46_c_pretrained.pdparams) - - [DLA60](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60_pretrained.pdparams) - - [DLA60x_c](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_c_pretrained.pdparams) - - [DLA60x](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/DLA60x_pretrained.pdparams) - - RedNet 系列[[39](#ref39)]([论文地址](https://arxiv.org/pdf/2103.06255.pdf)) - - [RedNet26](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet26_pretrained.pdparams) - - [RedNet38](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet38_pretrained.pdparams) - - [RedNet50](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet50_pretrained.pdparams) - - [RedNet101](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet101_pretrained.pdparams) - - [RedNet152](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RedNet152_pretrained.pdparams) - - - -**注意**:以上模型中 EfficientNetB1-B7 的预训练模型转自[pytorch 版 EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch),ResNeXt101_wsl 系列预训练模型转自[官方 repo](https://github.com/facebookresearch/WSL-Images),剩余预训练模型均基于飞桨训练得到的,并在 configs 里给出了相应的训练超参数。 - - - -## 4. 参考文献 - - -[1] He K, Zhang X, Ren S, et al. Deep residual learning for image recognition[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 770-778. - -[2] He T, Zhang Z, Zhang H, et al. Bag of tricks for image classification with convolutional neural networks[C]//Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2019: 558-567. - -[3] Howard A, Sandler M, Chu G, et al. Searching for mobilenetv3[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1314-1324. - -[4] Sandler M, Howard A, Zhu M, et al. Mobilenetv2: Inverted residuals and linear bottlenecks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 4510-4520. - -[5] Howard A G, Zhu M, Chen B, et al. Mobilenets: Efficient convolutional neural networks for mobile vision applications[J]. arXiv preprint arXiv:1704.04861, 2017. - -[6] Ma N, Zhang X, Zheng H T, et al. Shufflenet v2: Practical guidelines for efficient cnn architecture design[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 116-131. - -[7] Xie S, Girshick R, Dollár P, et al. Aggregated residual transformations for deep neural networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1492-1500. - - -[8] Hu J, Shen L, Sun G. Squeeze-and-excitation networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2018: 7132-7141. - - -[9] Gao S, Cheng M M, Zhao K, et al. Res2net: A new multi-scale backbone architecture[J]. IEEE transactions on pattern analysis and machine intelligence, 2019. - -[10] Szegedy C, Liu W, Jia Y, et al. Going deeper with convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2015: 1-9. - - -[11] Szegedy C, Ioffe S, Vanhoucke V, et al. Inception-v4, inception-resnet and the impact of residual connections on learning[C]//Thirty-first AAAI conference on artificial intelligence. 2017. - -[12] Chollet F. Xception: Deep learning with depthwise separable convolutions[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 1251-1258. - -[13] Wang J, Sun K, Cheng T, et al. Deep high-resolution representation learning for visual recognition[J]. arXiv preprint arXiv:1908.07919, 2019. - -[14] Chen Y, Li J, Xiao H, et al. Dual path networks[C]//Advances in neural information processing systems. 2017: 4467-4475. - -[15] Huang G, Liu Z, Van Der Maaten L, et al. Densely connected convolutional networks[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2017: 4700-4708. - - -[16] Tan M, Le Q V. Efficientnet: Rethinking model scaling for convolutional neural networks[J]. arXiv preprint arXiv:1905.11946, 2019. - -[17] Mahajan D, Girshick R, Ramanathan V, et al. Exploring the limits of weakly supervised pretraining[C]//Proceedings of the European Conference on Computer Vision (ECCV). 2018: 181-196. - -[18] Krizhevsky A, Sutskever I, Hinton G E. Imagenet classification with deep convolutional neural networks[C]//Advances in neural information processing systems. 2012: 1097-1105. - -[19] Iandola F N, Han S, Moskewicz M W, et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size[J]. arXiv preprint arXiv:1602.07360, 2016. - -[20] Simonyan K, Zisserman A. Very deep convolutional networks for large-scale image recognition[J]. arXiv preprint arXiv:1409.1556, 2014. - -[21] Redmon J, Divvala S, Girshick R, et al. You only look once: Unified, real-time object detection[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 779-788. - -[22] Ding X, Guo Y, Ding G, et al. Acnet: Strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1911-1920. - -[23] Han K, Wang Y, Tian Q, et al. GhostNet: More features from cheap operations[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 1580-1589. - -[24] Zhang H, Wu C, Zhang Z, et al. Resnest: Split-attention networks[J]. arXiv preprint arXiv:2004.08955, 2020. - -[25] Radosavovic I, Kosaraju R P, Girshick R, et al. Designing network design spaces[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 10428-10436. - -[26] C.Szegedy, V.Vanhoucke, S.Ioffe, J.Shlens, and Z.Wojna. Rethinking the inception architecture for computer vision. arXiv preprint arXiv:1512.00567, 2015. - -[27] Ze Liu, Yutong Lin, Yue Cao, Han Hu, Yixuan Wei, Zheng Zhang, Stephen Lin and Baining Guo. Swin Transformer: Hierarchical Vision Transformer using Shifted Windows. - -[28]Cheng Cui, Tingquan Gao, Shengyu Wei, Yuning Du, Ruoyu Guo, Shuilong Dong, Bin Lu, Ying Zhou, Xueying Lv, Qiwen Liu, Xiaoguang Hu, Dianhai Yu, Yanjun Ma. PP-LCNet: A Lightweight CPU Convolutional Neural Network. - -[29]Mingxing Tan, Quoc V. Le. MixConv: Mixed Depthwise Convolutional Kernels. - -[30]Dongyoon Han, Sangdoo Yun, Byeongho Heo, YoungJoon Yoo. Rethinking Channel Dimensions for Efficient Model Design. - -[31]Alexey Dosovitskiy, Lucas Beyer, Alexander Kolesnikov, Dirk Weissenborn, Xiaohua Zhai, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer, Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby. AN IMAGE IS WORTH 16X16 WORDS: -TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE. - -[32]Hugo Touvron, Matthieu Cord, Matthijs Douze, Francisco Massa, Alexandre Sablayrolles, Herve Jegou. Training data-efficient image transformers & distillation through attention. - -[33]Benjamin Graham, Alaaeldin El-Nouby, Hugo Touvron, Pierre Stock, Armand Joulin, Herve Jegou, Matthijs Douze. LeViT: a Vision Transformer in ConvNet’s Clothing for Faster Inference. - -[34]Xiangxiang Chu, Zhi Tian, Yuqing Wang, Bo Zhang, Haibing Ren, Xiaolin Wei, Huaxia Xia, Chunhua Shen. Twins: Revisiting the Design of Spatial Attention in Vision Transformers. - -[35]Kai Han, An Xiao, Enhua Wu, Jianyuan Guo, Chunjing Xu, Yunhe Wang. Transformer in Transformer. - -[36]Xiaohan Ding, Xiangyu Zhang, Ningning Ma, Jungong Han, Guiguang Ding, Jian Sun. RepVGG: Making VGG-style ConvNets Great Again. - -[37]Ping Chao, Chao-Yang Kao, Yu-Shan Ruan, Chien-Hsiang Huang, Youn-Long Lin. HarDNet: A Low Memory Traffic Network. - -[38]Fisher Yu, Dequan Wang, Evan Shelhamer, Trevor Darrell. Deep Layer Aggregation. - -[39]Duo Lim Jie Hu, Changhu Wang, Xiangtai Li, Qi She, Lei Zhu, Tong Zhang, Qifeng Chen. Involution: Inverting the Inherence of Convolution for Visual Recognition. diff --git a/ppcls/arch/backbone/__init__.py b/ppcls/arch/backbone/__init__.py index dadac59a38c6b3f88de51791ffd1dc6a1e281ecf..1bd23a9538dfcd73f4477fa26ef358893e66d964 100644 --- a/ppcls/arch/backbone/__init__.py +++ b/ppcls/arch/backbone/__init__.py @@ -61,6 +61,7 @@ from ppcls.arch.backbone.model_zoo.tnt import TNT_small from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds from ppcls.arch.backbone.model_zoo.cspnet import CSPDarkNet53 from ppcls.arch.backbone.model_zoo.pvt_v2 import PVT_V2_B0, PVT_V2_B1, PVT_V2_B2_Linear, PVT_V2_B2, PVT_V2_B3, PVT_V2_B4, PVT_V2_B5 +from ppcls.arch.backbone.model_zoo.repvgg import RepVGG_A0, RepVGG_A1, RepVGG_A2, RepVGG_B0, RepVGG_B1, RepVGG_B2, RepVGG_B1g2, RepVGG_B1g4, RepVGG_B2g4, RepVGG_B3g4 from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1 from ppcls.arch.backbone.variant_models.vgg_variant import VGG19Sigmoid from ppcls.arch.backbone.variant_models.pp_lcnet_variant import PPLCNet_x2_5_Tanh diff --git a/ppcls/arch/backbone/model_zoo/repvgg.py b/ppcls/arch/backbone/model_zoo/repvgg.py index e919a59fe6e00831e6b02a4d3c8b7cc8d8b9c432..1218be7feeec0336501441216c3fc802aeafa6f6 100644 --- a/ppcls/arch/backbone/model_zoo/repvgg.py +++ b/ppcls/arch/backbone/model_zoo/repvgg.py @@ -33,18 +33,12 @@ MODEL_URLS = { "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1_pretrained.pdparams", "RepVGG_B2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2_pretrained.pdparams", - "RepVGG_B3": - "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3_pretrained.pdparams", "RepVGG_B1g2": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g2_pretrained.pdparams", "RepVGG_B1g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B1g4_pretrained.pdparams", - "RepVGG_B2g2": - "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g2_pretrained.pdparams", "RepVGG_B2g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B2g4_pretrained.pdparams", - "RepVGG_B3g2": - "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g2_pretrained.pdparams", "RepVGG_B3g4": "https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/RepVGG_B3g4_pretrained.pdparams", } @@ -92,6 +86,8 @@ class RepVGGBlock(nn.Layer): groups=1, padding_mode='zeros'): super(RepVGGBlock, self).__init__() + self.is_repped = False + self.in_channels = in_channels self.out_channels = out_channels self.kernel_size = kernel_size @@ -127,6 +123,12 @@ class RepVGGBlock(nn.Layer): groups=groups) def forward(self, inputs): + if not self.training and not self.is_repped: + self.rep() + self.is_repped = True + if self.training and self.is_repped: + self.is_repped = False + if not self.training: return self.nonlinearity(self.rbr_reparam(inputs)) @@ -137,7 +139,7 @@ class RepVGGBlock(nn.Layer): return self.nonlinearity( self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) - def eval(self): + def rep(self): if not hasattr(self, 'rbr_reparam'): self.rbr_reparam = nn.Conv2D( in_channels=self.in_channels, @@ -148,12 +150,9 @@ class RepVGGBlock(nn.Layer): dilation=self.dilation, groups=self.groups, padding_mode=self.padding_mode) - self.training = False kernel, bias = self.get_equivalent_kernel_bias() self.rbr_reparam.weight.set_value(kernel) self.rbr_reparam.bias.set_value(bias) - for layer in self.sublayers(): - layer.eval() def get_equivalent_kernel_bias(self): kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) @@ -248,12 +247,6 @@ class RepVGG(nn.Layer): self.cur_layer_idx += 1 return nn.Sequential(*blocks) - def eval(self): - self.training = False - for layer in self.sublayers(): - layer.training = False - layer.eval() - def forward(self, x): out = self.stage0(x) out = self.stage1(out) @@ -367,17 +360,6 @@ def RepVGG_B2(pretrained=False, use_ssld=False, **kwargs): return model -def RepVGG_B2g2(pretrained=False, use_ssld=False, **kwargs): - model = RepVGG( - num_blocks=[4, 6, 16, 1], - width_multiplier=[2.5, 2.5, 2.5, 5], - override_groups_map=g2_map, - **kwargs) - _load_pretrained( - pretrained, model, MODEL_URLS["RepVGG_B2g2"], use_ssld=use_ssld) - return model - - def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs): model = RepVGG( num_blocks=[4, 6, 16, 1], @@ -389,28 +371,6 @@ def RepVGG_B2g4(pretrained=False, use_ssld=False, **kwargs): return model -def RepVGG_B3(pretrained=False, use_ssld=False, **kwargs): - model = RepVGG( - num_blocks=[4, 6, 16, 1], - width_multiplier=[3, 3, 3, 5], - override_groups_map=None, - **kwargs) - _load_pretrained( - pretrained, model, MODEL_URLS["RepVGG_B3"], use_ssld=use_ssld) - return model - - -def RepVGG_B3g2(pretrained=False, use_ssld=False, **kwargs): - model = RepVGG( - num_blocks=[4, 6, 16, 1], - width_multiplier=[3, 3, 3, 5], - override_groups_map=g2_map, - **kwargs) - _load_pretrained( - pretrained, model, MODEL_URLS["RepVGG_B3g2"], use_ssld=use_ssld) - return model - - def RepVGG_B3g4(pretrained=False, use_ssld=False, **kwargs): model = RepVGG( num_blocks=[4, 6, 16, 1], diff --git a/ppcls/arch/backbone/model_zoo/tnt.py b/ppcls/arch/backbone/model_zoo/tnt.py index 3ccb7575effb623457e92c9a79c2f891c10fbc71..dcffcf49bd594ac67e82821979e174e9f81443f2 100644 --- a/ppcls/arch/backbone/model_zoo/tnt.py +++ b/ppcls/arch/backbone/model_zoo/tnt.py @@ -372,7 +372,7 @@ def _load_pretrained(pretrained, model, model_url, use_ssld=False): ) -def TNT_small(pretrained=False, **kwargs): +def TNT_small(pretrained=False, use_ssld=False, **kwargs): model = TNT(patch_size=16, embed_dim=384, in_dim=24, @@ -381,5 +381,6 @@ def TNT_small(pretrained=False, **kwargs): in_num_head=4, qkv_bias=False, **kwargs) - _load_pretrained(pretrained, model, MODEL_URLS["TNT_small"]) + _load_pretrained( + pretrained, model, MODEL_URLS["TNT_small"], use_ssld=use_ssld) return model diff --git a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml index 9612be4e61bcdda39d568d0422a5b741a96b9d20..a7265b066e1c526fbb63f59993ff68bb4ae09d8a 100644 --- a/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml +++ b/ppcls/configs/ImageNet/Distillation/mv3_large_x1_0_distill_mv3_small_x1_0.yaml @@ -18,7 +18,7 @@ Global: # model architecture Arch: name: "DistillationModel" - class_num: 1000 + class_num: &class_num 1000 # if not null, its lengths should be same as models pretrained_list: # if not null, its lengths should be same as models @@ -28,11 +28,13 @@ Arch: models: - Teacher: name: MobileNetV3_large_x1_0 + class_num: *class_num pretrained: True use_ssld: True dropout_prob: null - Student: name: MobileNetV3_small_x1_0 + class_num: *class_num pretrained: False dropout_prob: null diff --git a/ppcls/engine/engine.py b/ppcls/engine/engine.py index 0900b472bd2042a9ac771c60d2b64a1888fda5fa..019cf165054a080e6d3883e5b01e3281ebd1d19e 100644 --- a/ppcls/engine/engine.py +++ b/ppcls/engine/engine.py @@ -92,7 +92,7 @@ class Engine(object): self.vdl_writer = LogWriter(logdir=vdl_writer_path) # set device - assert self.config["Global"]["device"] in ["cpu", "gpu", "xpu", "npu"] + assert self.config["Global"]["device"] in ["cpu", "gpu", "xpu", "npu", "mlu"] self.device = paddle.set_device(self.config["Global"]["device"]) logger.info('train with paddle {} and device {}'.format( paddle.__version__, self.device)) @@ -107,7 +107,9 @@ class Engine(object): self.scale_loss = 1.0 self.use_dynamic_loss_scaling = False if self.amp: - AMP_RELATED_FLAGS_SETTING = {'FLAGS_max_inplace_grad_add': 8, } + AMP_RELATED_FLAGS_SETTING = { + 'FLAGS_max_inplace_grad_add': 8, + } if paddle.is_compiled_with_cuda(): AMP_RELATED_FLAGS_SETTING.update({ 'FLAGS_cudnn_batchnorm_spatial_persistent': 1 @@ -172,7 +174,9 @@ class Engine(object): if metric_config is not None: metric_config = metric_config.get("Train") if metric_config is not None: - if hasattr(self.train_dataloader, "collate_fn"): + if hasattr( + self.train_dataloader, "collate_fn" + ) and self.train_dataloader.collate_fn is not None: for m_idx, m in enumerate(metric_config): if "TopkAcc" in m: msg = f"'TopkAcc' metric can not be used when setting 'batch_transform_ops' in config. The 'TopkAcc' metric has been removed." diff --git a/ppcls/engine/train/train.py b/ppcls/engine/train/train.py index 3b02bac8f305d9b944eb97a5a98ba3c39521191a..b15c1088ae23b16a1e1c724a5c783f75cb5449ce 100644 --- a/ppcls/engine/train/train.py +++ b/ppcls/engine/train/train.py @@ -21,7 +21,6 @@ from ppcls.utils import profiler def train_epoch(engine, epoch_id, print_batch_step): tic = time.time() - v_current = [int(i) for i in paddle.__version__.split(".")] for iter_id, batch in enumerate(engine.train_dataloader): if iter_id >= engine.max_iter: break diff --git a/ppcls/metric/metrics.py b/ppcls/metric/metrics.py index 7c6407e7a4c74fa7d4330d72c6be52f6a843cdf0..03e742082b57439227746d21695379b498e7f1d8 100644 --- a/ppcls/metric/metrics.py +++ b/ppcls/metric/metrics.py @@ -302,8 +302,5 @@ class AccuracyScore(MutiLabelMetric): fps = mcm[:, 0, 1] accuracy = (sum(tps) + sum(tns)) / ( sum(tps) + sum(tns) + sum(fns) + sum(fps)) - precision = sum(tps) / (sum(tps) + sum(fps)) - recall = sum(tps) / (sum(tps) + sum(fns)) - F1 = 2 * (accuracy * recall) / (accuracy + recall) metric_dict["AccuracyScore"] = paddle.to_tensor(accuracy) return metric_dict diff --git a/ppcls/static/train.py b/ppcls/static/train.py index 075a0bdd03289e242d739b8eb43741c71eb0e876..dd16cdb4caa41671d8f9979fea32e49611cf6ab0 100644 --- a/ppcls/static/train.py +++ b/ppcls/static/train.py @@ -91,9 +91,10 @@ def main(args): use_xpu = global_config.get("use_xpu", False) use_npu = global_config.get("use_npu", False) + use_mlu = global_config.get("use_mlu", False) assert ( - use_gpu and use_xpu and use_npu - ) is not True, "gpu, xpu and npu can not be true in the same time in static mode!" + use_gpu and use_xpu and use_npu and use_mlu + ) is not True, "gpu, xpu, npu and mlu can not be true in the same time in static mode!" if use_gpu: device = paddle.set_device('gpu') @@ -101,6 +102,8 @@ def main(args): device = paddle.set_device('xpu') elif use_npu: device = paddle.set_device('npu') + elif use_mlu: + device = paddle.set_device('mlu') else: device = paddle.set_device('cpu') diff --git a/setup.py b/setup.py index a17e77d4fb2d17f27e4225d9f90b97cd4320c6fa..57045d31903917fdb8634887a1f6e7207871ead5 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,9 @@ with open('requirements.txt', encoding="utf-8-sig") as f: def readme(): - with open('docs/en/whl_en.md', encoding="utf-8-sig") as f: + with open( + 'docs/en/inference_deployment/whl_deploy_en.md', + encoding="utf-8-sig") as f: README = f.read() return README