From f10df497d301afc0e25f76a1b08eb8314667857c Mon Sep 17 00:00:00 2001 From: cuicheng01 <45199522+cuicheng01@users.noreply.github.com> Date: Wed, 11 Sep 2019 21:41:11 +0800 Subject: [PATCH] update some pretrained models in image classification (#3315) refine image classification and add 19 pre-trained models. --- PaddleCV/image_classification/README.md | 34 +- PaddleCV/image_classification/README_en.md | 200 ++++++------ .../image_classification/models/__init__.py | 13 +- PaddleCV/image_classification/models/dpn.py | 10 +- .../models/{mobilenet.py => mobilenet_v1.py} | 28 +- .../models/mobilenet_v2.py | 32 +- .../models/mobilenet_v3.py | 265 ++++++++++++++++ .../image_classification/models/model_libs.py | 128 ++++++++ .../image_classification/models/resnet_vd.py | 90 ++++-- .../image_classification/models/resnext_vd.py | 3 +- .../models/se_resnet_vd.py | 295 ++++++++++++++++++ .../models/se_resnext_vd.py | 19 +- .../image_classification/models/xception.py | 15 +- .../models/xception_deeplab.py | 272 ++++++++++++++++ .../scripts/train/DPN107.sh | 15 + .../scripts/train/DPN131.sh | 15 + .../scripts/train/DPN68.sh | 15 + .../scripts/train/DPN92.sh | 15 + .../scripts/train/DPN98.sh | 15 + .../scripts/train/MobileNetV1.sh | 2 +- .../scripts/train/MobileNetV1_x0_25.sh | 17 + .../scripts/train/MobileNetV1_x0_5.sh | 17 + .../scripts/train/MobileNetV1_x0_75.sh | 17 + .../scripts/train/MobileNetV2.sh | 2 +- .../scripts/train/MobileNetV2_x0_75.sh | 17 + .../scripts/train/ResNeXt101_32x4d.sh | 27 +- .../scripts/train/ResNeXt101_vd_32x4d.sh | 22 ++ .../scripts/train/ResNeXt152_vd_64x4d.sh | 15 + .../scripts/train/ResNet18_vd.sh | 19 ++ .../scripts/train/ResNet34_vd.sh | 19 ++ .../train/{SE_154_vd.sh => SENet154_vd.sh} | 2 +- .../scripts/train/SE_ResNet50_vd.sh | 15 + .../train/{Xception_41.sh => Xception41.sh} | 4 +- .../scripts/train/Xception41_deeplab.sh | 13 + .../scripts/train/Xception65.sh | 16 + .../scripts/train/Xception65_deeplab.sh | 13 + .../scripts/train/Xception71.sh | 16 + .../image_classification/utils/utility.py | 2 +- 38 files changed, 1547 insertions(+), 187 deletions(-) rename PaddleCV/image_classification/models/{mobilenet.py => mobilenet_v1.py} (91%) create mode 100644 PaddleCV/image_classification/models/mobilenet_v3.py create mode 100644 PaddleCV/image_classification/models/model_libs.py create mode 100644 PaddleCV/image_classification/models/se_resnet_vd.py create mode 100644 PaddleCV/image_classification/models/xception_deeplab.py create mode 100644 PaddleCV/image_classification/scripts/train/DPN107.sh create mode 100644 PaddleCV/image_classification/scripts/train/DPN131.sh create mode 100644 PaddleCV/image_classification/scripts/train/DPN68.sh create mode 100644 PaddleCV/image_classification/scripts/train/DPN92.sh create mode 100644 PaddleCV/image_classification/scripts/train/DPN98.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV1_x0_25.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV1_x0_5.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV1_x0_75.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV2_x0_75.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt101_vd_32x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt152_vd_64x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet18_vd.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet34_vd.sh rename PaddleCV/image_classification/scripts/train/{SE_154_vd.sh => SENet154_vd.sh} (95%) create mode 100644 PaddleCV/image_classification/scripts/train/SE_ResNet50_vd.sh rename PaddleCV/image_classification/scripts/train/{Xception_41.sh => Xception41.sh} (81%) create mode 100644 PaddleCV/image_classification/scripts/train/Xception41_deeplab.sh create mode 100644 PaddleCV/image_classification/scripts/train/Xception65.sh create mode 100644 PaddleCV/image_classification/scripts/train/Xception65_deeplab.sh create mode 100644 PaddleCV/image_classification/scripts/train/Xception71.sh diff --git a/PaddleCV/image_classification/README.md b/PaddleCV/image_classification/README.md index 35599435..cb35bdae 100644 --- a/PaddleCV/image_classification/README.md +++ b/PaddleCV/image_classification/README.md @@ -136,7 +136,7 @@ bash run.sh train 模型名 * **use_gpu**: 是否在GPU上运行,默认值: True * **use_label_smoothing**: 是否对数据进行label smoothing处理,默认值: False -* **label_smoothing_epsilon**: label_smoothing的epsilon, 默认值:0.2 +* **label_smoothing_epsilon**: label_smoothing的epsilon, 默认值:0.1 * **random_seed**: 随机数种子, 默认值: 1000 **数据读取器说明:** 数据读取器定义在```reader.py```文件中,现在默认基于cv2的数据读取器, 在[训练阶段](#模型训练),默认采用的增广方式是随机裁剪与水平翻转, 而在[模型评估](#模型评估)与[模型预测](#模型预测)阶段用的默认方式是中心裁剪。当前支持的数据增广方式有: @@ -255,12 +255,17 @@ PaddlePaddle/Models ImageClassification 支持自定义数据 ### MobileNet Series |Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: |:-: | +|[MobileNetV1_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_x0_25_pretrained.tar) | 51.43% | 75.46% | 2.283 | 0.866 | +|[MobileNetV1_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_x0_5_pretrained.tar) | 63.52% | 84.73% | 2.378 | 1.058 | +|[MobileNetV1_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_x0_75_pretrained.tar) | 68.81% | 88.23% | 2.540 | 1.386 | |[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99% | 89.68% | 2.609 |1.615 | -|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15% | 90.65% | 4.546 | 5.278 | |[MobileNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | 53.21% | 76.52% | 4.267 | 3.777 | |[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03% | 85.72% | 4.514 | 4.150 | +|[MobileNetV2_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_75_pretrained.tar) | 69.83% | 89.01% | 4.313 | 3.720 | +|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15% | 90.65% | 4.546 | 5.278 | |[MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12% | 91.67% | 5.235 | 6.909 | |[MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23% | 92.58% | 6.680 | 7.658 | +|[MobileNetV3_small_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 67.46% | 87.12% | 6.809 | | ### ShuffleNet Series |Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | @@ -277,7 +282,9 @@ PaddlePaddle/Models ImageClassification 支持自定义数据 |Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: |:-: | |[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98% | 89.92% | 3.456 | 2.484 | +|[ResNet18_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_vd_pretrained.tar) | 72.26% | 90.80% | 3.847 | 2.473 | |[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57% | 92.14% | 5.668 | 3.767 | +|[ResNet34_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_vd_pretrained.tar) | 75.98% | 92.98% | 6.089 | 3.531 | |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50% | 93.00% | 8.787 | 5.434 | |[ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_pretrained.tar) |78.35% | 94.03% | 9.013 | 5.463 | |[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12% | 94.44% | 9.058 | 5.510 | @@ -296,10 +303,12 @@ PaddlePaddle/Models ImageClassification 支持自定义数据 |[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43% | 94.13% | 28.162 | 18.271 | |[ResNeXt50_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_64x4d_pretrained.tar) | 80.12% | 94.86% | 20.888 | 17.687 | |[ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x4d_pretrained.tar) | 78.65% | 94.19% | 24.154 | 21.387 | +|[ResNeXt101_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_32x4d_pretrained.tar) | 80.33% | 95.12% | 24.701 | 18.032 | |[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43% | 94.13% | 41.073 | 38.736 | |[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78% | 95.20% | 42.277 | 40.929 | |[ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_32x4d_pretrained.tar) | 78.98% | 94.33% | 37.007 | 31.301 | |[ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_64x4d_pretrained.tar) | 79.51% | 94.71% | 58.966 | 57.267 | +|[ResNeXt152_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_vd_64x4d_pretrained.tar) | 81.08% | 95.34% | 60.947 | 49.117 | ### DenseNet Series |Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | @@ -310,9 +319,19 @@ PaddlePaddle/Models ImageClassification 支持自定义数据 |[DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar) | 77.63% | 93.66% | 26.583 | 10.549 | |[DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet264_pretrained.tar) | 77.96% | 93.85% | 41.495 | 15.574 | +### DPN Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[DPN68](https://paddle-imagenet-models-name.bj.bcebos.com/DPN68_pretrained.tar) | 76.78% | 93.43% | 18.446 | 6.324 | +|[DPN92](https://paddle-imagenet-models-name.bj.bcebos.com/DPN92_pretrained.tar) | 79.85% | 94.80% | 25.748 | 22.182 | +|[DPN98](https://paddle-imagenet-models-name.bj.bcebos.com/DPN98_pretrained.tar) | 80.59% | 95.10% | 29.421 | 13.657 | +|[DPN107](https://paddle-imagenet-models-name.bj.bcebos.com/DPN107_pretrained.tar) | 80.89% | 95.32% | 41.071 | 19.115 | +|[DPN131](https://paddle-imagenet-models-name.bj.bcebos.com/DPN131_pretrained.tar) | 80.70% | 95.14% | 41.179 | 18.278 | + ### SENet Series |Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: |:-: | +|[SE_ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNet50_vd_pretrained.tar) | 79.52% | 94.75% | 10.345 | 7.662 | |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44% | 93.96% | 14.916 | 12.126 | |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12% | 94.20% | 30.085 | 24.110 | |[SENet_154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SENet_154_vd_pretrained.tar) | 81.40% | 95.48% | 71.892 | 64.855 | @@ -321,7 +340,11 @@ PaddlePaddle/Models ImageClassification 支持自定义数据 | Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: |:-: | |[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogLeNet_pretrained.tar) | 70.70% | 89.66% | 6.528 | 3.076 | -|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception_41_pretrained.tar) | 79.30% | 94.53% | 13.757 | 10.831 | +|[Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30% | 94.53% | 13.757 | 10.831 | +|[Xception41_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 79.55% | 94.38% | 14.268 | 10.301 | +|[Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_pretrained.tar) | 81.00% | 95.49% | 19.216 | 15.981 | +|[Xception65_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 80.32% | 94.49% | 19.536 | 16.365 | +|[Xception71](https://paddle-imagenet-models-name.bj.bcebos.com/Xception71_pretrained.tar) | 81.11% | 95.45% | 23.291 | 18.974 | |[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77% | 95.26% | 32.413 | 18.154 | ### DarkNet @@ -366,12 +389,14 @@ PaddlePaddle/Models ImageClassification 支持自定义数据 - ShuffleNetV2: [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164), Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun - MobileNetV1: [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861), Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam - MobileNetV2: [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381v4.pdf), Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen +- MobileNetV3: [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf), Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam - VGG: [Very Deep Convolutional Networks for Large-scale Image Recognition](https://arxiv.org/pdf/1409.1556), Karen Simonyan, Andrew Zisserman - GoogLeNet: [Going Deeper with Convolutions](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf), Christian Szegedy1, Wei Liu2, Yangqing Jia - Xception: [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357), Franc ̧ois Chollet - InceptionV4: [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261), Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi - DarkNet: [YOLOv3: An Incremental Improvement](https://pjreddie.com/media/files/papers/YOLOv3.pdf), Joseph Redmon, Ali Farhadi - DenseNet: [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993), Gao Huang, Zhuang Liu, Laurens van der Maaten +- DPN: [Dual Path Networks](https://arxiv.org/pdf/1707.01629.pdf), Yunpeng Chen, Jianan Li, Huaxin Xiao, Xiaojie Jin, Shuicheng Yan, Jiashi Feng - SqueezeNet: [SQUEEZENET: ALEXNET-LEVEL ACCURACY WITH 50X FEWER PARAMETERS AND <0.5MB MODEL SIZE](https://arxiv.org/abs/1602.07360), Forrest N. Iandola, Song Han, Matthew W. Moskewicz, Khalid Ashraf, William J. Dally, Kurt Keutzer - ResNeXt101_wsl: [Exploring the Limits of Weakly Supervised Pretraining](https://arxiv.org/abs/1805.00932), Dhruv Mahajan, Ross Girshick, Vignesh Ramanathan, Kaiming He, Manohar Paluri, Yixuan Li, Ashwin Bharambe, Laurens van der Maaten - Fix_ResNeXt101_wsl: [Fixing the train-test resolution discrepancy](https://arxiv.org/abs/1906.06423), Hugo Touvron, Andrea Vedaldi, Matthijs Douze, Herve ́ Je ́gou @@ -383,9 +408,10 @@ PaddlePaddle/Models ImageClassification 支持自定义数据 - 2019/04/01 **Stage3**: 更新ResNet18,ResNet34,GoogLeNet,ShuffleNetV2 - 2019/06/12 **Stage4**: 更新ResNet50_vc,ResNet50_vd,ResNet101_vd,ResNet152_vd,ResNet200_vd,SE154_vd InceptionV4,ResNeXt101_64x4d,ResNeXt101_vd_64x4d - 2019/06/22 更新ResNet50_vd_v2 -- 2019/07/02 **Stage5**: 更新MobileNetV2_x0_5,ResNeXt50_32x4d,ResNeXt50_64x4d,Xception_41,ResNet101_vd +- 2019/07/02 **Stage5**: 更新MobileNetV2_x0_5,ResNeXt50_32x4d,ResNeXt50_64x4d,Xception41,ResNet101_vd - 2019/07/19 **Stage6**: 更新ShuffleNetV2_x0_25,ShuffleNetV2_x0_33,ShuffleNetV2_x0_5,ShuffleNetV2_x1_0,ShuffleNetV2_x1_5,ShuffleNetV2_x2_0,MobileNetV2_x0_25,MobileNetV2_x1_5,MobileNetV2_x2_0,ResNeXt50_vd_64x4d,ResNeXt101_32x4d,ResNeXt152_32x4d - 2019/08/01 **Stage7**: 更新DarkNet53,DenseNet121,Densenet161,DenseNet169,DenseNet201,DenseNet264,SqueezeNet1_0,SqueezeNet1_1,ResNeXt50_vd_32x4d,ResNeXt152_64x4d,ResNeXt101_32x8d_wsl,ResNeXt101_32x16d_wsl,ResNeXt101_32x32d_wsl,ResNeXt101_32x48d_wsl,Fix_ResNeXt101_32x48d_wsl +- 2019/09/11 **Stage8**: 更新ResNet18_vd,ResNet34_vd,MobileNetV1_x0_25,MobileNetV1_x0_5,MobileNetV1_x0_75,MobileNetV2_x0_75,MobilenNetV3_small_x1_0,DPN68,DPN92,DPN98,DPN107,DPN131,ResNeXt101_vd_32x4d,ResNeXt152_vd_64x4d,Xception65,Xception71,Xception41_deeplab,Xception65_deeplab,SE_ResNet50_vd ## 如何贡献代码 diff --git a/PaddleCV/image_classification/README_en.md b/PaddleCV/image_classification/README_en.md index e2f1e4ec..3a8cb896 100644 --- a/PaddleCV/image_classification/README_en.md +++ b/PaddleCV/image_classification/README_en.md @@ -128,7 +128,7 @@ Switch: * **use_gpu**: whether to use GPU or not. Default: True. * **use_label_smoothing**: whether to use label_smoothing or not. Default:False. -* **label_smoothing_epsilon**: the label_smoothing_epsilon. Default:0.2. +* **label_smoothing_epsilon**: the label_smoothing_epsilon. Default:0.1. * **random_seed**: random seed for debugging, Default: 1000 **data reader introduction:** Data reader is defined in ```reader.py```, default reader is implemented by opencv. In the [Training](#training) Stage, random crop and flipping are applied, while center crop is applied in the [Evaluation](#evaluation) and [Inference](#inference) stages. Supported data augmentation includes: @@ -215,110 +215,133 @@ Pretrained models can be downloaded by clicking related model names. ``` - 4: The pretrained model of the ResNeXt101_wsl series network is converted from the pytorch model. Please refer to [RESNEXT WSL](https://pytorch.org/hub/facebookresearch_WSL-Images_resnext/) for details. -### AlexNet Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | 3.083 | 2.728 | +### AlexNet +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72% | 79.17% | 3.083 | 2.728 | -### SqueezeNet Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[SqueezeNet1_0](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_0_pretrained.tar) | 59.60%/81.66% | 2.740 | 1.688 | -|[SqueezeNet1_1](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_1_pretrained.tar) | 60.08%/81.85% | 2.751 | 1.270 | +### SqueezeNet +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[SqueezeNet1_0](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_0_pretrained.tar) | 59.60% | 81.66% | 2.740 | 1.688 | +|[SqueezeNet1_1](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_1_pretrained.tar) | 60.08% | 81.85% | 2.751 | 1.270 | ### VGG Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | 8.223 | 6.821 | -|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | 9.512 | 7.783 | -|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | 11.315 | 9.067 | -|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | 13.096 | 10.388 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28% | 89.09% | 8.223 | 6.821 | +|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02% | 89.42% | 9.512 | 7.783 | +|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00% | 90.69% | 11.315 | 9.067 | +|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56% | 90.93% | 13.096 | 10.388 | ### MobileNet Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 |1.615 | -|[MobileNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | 53.21%/76.52% | 4.267 | 3.777 | -|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% | 4.514 | 4.150 | -|[MobileNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | -|[MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% | 5.235 | 6.909 | -|[MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% | 6.680 | 7.658 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[MobileNetV1_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_x0_25_pretrained.tar) | 51.43% | 75.46% | 2.283 | 0.866 | +|[MobileNetV1_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_x0_5_pretrained.tar) | 63.52% | 84.73% | 2.378 | 1.058 | +|[MobileNetV1_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_x0_75_pretrained.tar) | 68.81% | 88.23% | 2.540 | 1.386 | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99% | 89.68% | 2.609 |1.615 | +|[MobileNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | 53.21% | 76.52% | 4.267 | 3.777 | +|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03% | 85.72% | 4.514 | 4.150 | +|[MobileNetV2_x0_75](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_75_pretrained.tar) | 69.83% | 89.01% | 4.313 | 3.720 | +|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15% | 90.65% | 4.546 | 5.278 | +|[MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12% | 91.67% | 5.235 | 6.909 | +|[MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23% | 92.58% | 6.680 | 7.658 | +|[MobileNetV3_small_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar) | 67.46% | 87.12% | 6.809 | | ### ShuffleNet Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ShuffleNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_25_pretrained.tar) | 49.90%/73.79% | 5.956 | 2.961 | -|[ShuffleNetV2_x0_33](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_33_pretrained.tar) | 53.73%/77.05% | 5.896 | 2.941 | -|[ShuffleNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_5_pretrained.tar) | 60.32%/82.26% | 6.048 | 3.088 | -|[ShuffleNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_0_pretrained.tar) | 68.80%/88.45% | 6.101 | 3.616 | -|[ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_5_pretrained.tar) | 71.63%/90.15% | 6.113 | 3.699 | -|[ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x2_0_pretrained.tar) | 73.15%/91.20% | 6.430 | 4.553 | -|[ShuffleNetV2_x1_0_swish](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 68.80% | 88.45% | 6.101 | 3.616 | +|[ShuffleNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_25_pretrained.tar) | 49.90% | 73.79% | 5.956 | 2.961 | +|[ShuffleNetV2_x0_33](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_33_pretrained.tar) | 53.73% | 77.05% | 5.896 | 2.941 | +|[ShuffleNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_5_pretrained.tar) | 60.32% | 82.26% | 6.048 | 3.088 | +|[ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_5_pretrained.tar) | 71.63% | 90.15% | 6.113 | 3.699 | +|[ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x2_0_pretrained.tar) | 73.15% | 91.20% | 6.430 | 4.553 | +|[ShuffleNetV2_swish](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_swish_pretrained.tar) | 70.03% | 89.17% | 6.078 | 6.282 | ### ResNet Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | 3.456 | 2.484 | -|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | 5.668 | 3.767 | -|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | 8.787 | 5.434 | -|[ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_pretrained.tar) |78.35%/94.03% | 9.013 | 5.463 | -|[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | 9.058 | 5.510 | -|[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84%/94.93% | 9.058 | 5.510 | -|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | 15.447 | 8.779 | -|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17%/94.97% | 15.685 | 8.878 | -|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | 21.816 | 12.148 | -|[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | 22.041 | 12.259 | -|[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93%/95.33% | 28.015 | 15.278 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98% | 89.92% | 3.456 | 2.484 | +|[ResNet18_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_vd_pretrained.tar) | 72.26% | 90.80% | 3.847 | 2.473 | +|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57% | 92.14% | 5.668 | 3.767 | +|[ResNet34_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_vd_pretrained.tar) | 75.98% | 92.98% | 6.089 | 3.531 | +|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50% | 93.00% | 8.787 | 5.434 | +|[ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_pretrained.tar) |78.35% | 94.03% | 9.013 | 5.463 | +|[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12% | 94.44% | 9.058 | 5.510 | +|[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84% | 94.93% | 9.058 | 5.510 | +|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56% | 93.64% | 15.447 | 8.779 | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17% | 94.97% | 15.685 | 8.878 | +|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26% | 93.96% | 21.816 | 12.148 | +|[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59% | 95.30% | 22.041 | 12.259 | +|[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93% | 95.33% | 28.015 | 15.278 | ### ResNeXt Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75%/93.82% | 12.863 | 9.837 | -|[ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_32x4d_pretrained.tar) | 79.56%/94.62% | 13.673 | 9.991 | -|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 28.162 | 18.271 | -|[ResNeXt50_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_64x4d_pretrained.tar) | 80.12%/94.86% | 20.888 | 17.687 | -|[ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x4d_pretrained.tar) | 78.65%/94.19% | 24.154 | 21.387 | -|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 41.073 | 38.736 | -|[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | 42.277 | 40.929 | -|[ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_32x4d_pretrained.tar) | 78.98%/94.33% | 37.007 | 31.301 | -|[ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_64x4d_pretrained.tar) | 79.51%/94.71% | 58.966 | 57.267 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75% | 93.82% | 12.863 | 9.837 | +|[ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_32x4d_pretrained.tar) | 79.56% | 94.62% | 13.673 | 9.991 | +|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43% | 94.13% | 28.162 | 18.271 | +|[ResNeXt50_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_64x4d_pretrained.tar) | 80.12% | 94.86% | 20.888 | 17.687 | +|[ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x4d_pretrained.tar) | 78.65% | 94.19% | 24.154 | 21.387 | +|[ResNeXt101_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_32x4d_pretrained.tar) | 80.33% | 95.12% | 24.701 | 18.032 | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43% | 94.13% | 41.073 | 38.736 | +|[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78% | 95.20% | 42.277 | 40.929 | +|[ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_32x4d_pretrained.tar) | 78.98% | 94.33% | 37.007 | 31.301 | +|[ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_64x4d_pretrained.tar) | 79.51% | 94.71% | 58.966 | 57.267 | +|[ResNeXt152_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_vd_64x4d_pretrained.tar) | 81.08% | 95.34% | 60.947 | 49.117 | ### DenseNet Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 75.66%/92.58% | 12.437 | 5.813 | -|[DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar) | 78.57%/94.14% | 27.717 | 12.861 | -|[DenseNet169](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet169_pretrained.tar) | 76.81%/93.31% | 18.941 | 8.146 | -|[DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar) | 77.63%/93.66% | 26.583 | 10.549 | -|[DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet264_pretrained.tar) | 77.96%/93.85% | 41.495 | 15.574 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 75.66% | 92.58% | 12.437 | 5.813 | +|[DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar) | 78.57% | 94.14% | 27.717 | 12.861 | +|[DenseNet169](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet169_pretrained.tar) | 76.81% | 93.31% | 18.941 | 8.146 | +|[DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar) | 77.63% | 93.66% | 26.583 | 10.549 | +|[DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet264_pretrained.tar) | 77.96% | 93.85% | 41.495 | 15.574 | + +### DPN Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[DPN68](https://paddle-imagenet-models-name.bj.bcebos.com/DPN68_pretrained.tar) | 76.78% | 93.43% | 18.446 | 6.324 | +|[DPN92](https://paddle-imagenet-models-name.bj.bcebos.com/DPN92_pretrained.tar) | 79.85% | 94.80% | 25.748 | 22.182 | +|[DPN98](https://paddle-imagenet-models-name.bj.bcebos.com/DPN98_pretrained.tar) | 80.59% | 95.10% | 29.421 | 13.657 | +|[DPN107](https://paddle-imagenet-models-name.bj.bcebos.com/DPN107_pretrained.tar) | 80.89% | 95.32% | 41.071 | 19.115 | +|[DPN131](https://paddle-imagenet-models-name.bj.bcebos.com/DPN131_pretrained.tar) | 80.70% | 95.14% | 41.179 | 18.278 | ### SENet Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | 14.916 | 12.126 | -|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | 30.085 | 24.110 | -|[SE_154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE_154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[SE_ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNet50_vd_pretrained.tar) | 79.52% | 94.75% | 10.345 | 7.662 | +|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44% | 93.96% | 14.916 | 12.126 | +|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12% | 94.20% | 30.085 | 24.110 | +|[SENet_154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SENet_154_vd_pretrained.tar) | 81.40% | 95.48% | 71.892 | 64.855 | ### Inception Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogLeNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | -|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception_41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | -|[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77%/95.26% | 32.413 | 18.154 | +| Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogLeNet_pretrained.tar) | 70.70% | 89.66% | 6.528 | 3.076 | +|[Xception41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30% | 94.53% | 13.757 | 10.831 | +|[Xception41_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_deeplab_pretrained.tar) | 79.55% | 94.38% | 14.268 | 10.301 | +|[Xception65](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_pretrained.tar) | 81.00% | 95.49% | 19.216 | 15.981 | +|[Xception65_deeplab](https://paddle-imagenet-models-name.bj.bcebos.com/Xception65_deeplab_pretrained.tar) | 80.32% | 94.49% | 19.536 | 16.365 | +|[Xception71](https://paddle-imagenet-models-name.bj.bcebos.com/Xception71_pretrained.tar) | 81.11% | 95.45% | 23.291 | 18.974 | +|[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77% | 95.26% | 32.413 | 18.154 | ### DarkNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar) | 78.04%/94.05% | 11.969 | 7.153 | - -### ResNeXt101_:wwsl Series -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ResNeXt101_32x8d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x8d_wsl_pretrained.tar) | 82.55%/96.74% | 33.310 | 27.648 | -|[ResNeXt101_32x16d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x16d_wsl_pretrained.tar) | 84.24%/97.26% | 54.320 | 46.064 | -|[ResNeXt101_32x32d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x32d_wsl_pretrained.tar) | 84.97%/97.59% | 97.734 | 87.961 | -|[ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x48d_wsl_pretrained.tar) | 85.37%/97.69% | 161.722 | | -|[Fix_ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/Fix_ResNeXt101_32x48d_wsl_pretrained.tar) | 86.26%/97.97% | 236.091 | | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar) | 78.04% | 94.05% | 11.969 | 7.153 | + +### ResNeXt101_wsl Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNeXt101_32x8d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x8d_wsl_pretrained.tar) | 82.55% | 96.74% | 33.310 | 27.648 | +|[ResNeXt101_32x16d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x16d_wsl_pretrained.tar) | 84.24% | 97.26% | 54.320 | 46.064 | +|[ResNeXt101_32x32d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x32d_wsl_pretrained.tar) | 84.97% | 97.59% | 97.734 | 87.961 | +|[ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x48d_wsl_pretrained.tar) | 85.37% | 97.69% | 161.722 | | +|[Fix_ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/Fix_ResNeXt101_32x48d_wsl_pretrained.tar) | 86.26% | 97.97% | 236.091 | | ## FAQ @@ -340,12 +363,14 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - ShuffleNetV2: [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164), Ningning Ma, Xiangyu Zhang, Hai-Tao Zheng, Jian Sun - MobileNetV1: [MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861), Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, Hartwig Adam - MobileNetV2: [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381v4.pdf), Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen +- MobileNetV3: [Searching for MobileNetV3](https://arxiv.org/pdf/1905.02244.pdf), Andrew Howard, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang, Yukun Zhu, Ruoming Pang, Vijay Vasudevan, Quoc V. Le, Hartwig Adam - VGG: [Very Deep Convolutional Networks for Large-scale Image Recognition](https://arxiv.org/pdf/1409.1556), Karen Simonyan, Andrew Zisserman - GoogLeNet: [Going Deeper with Convolutions](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf), Christian Szegedy1, Wei Liu2, Yangqing Jia - Xception: [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357), Franc ̧ois Chollet - InceptionV4: [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261), Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi - DarkNet: [YOLOv3: An Incremental Improvement](https://pjreddie.com/media/files/papers/YOLOv3.pdf), Joseph Redmon, Ali Farhadi - DenseNet: [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993), Gao Huang, Zhuang Liu, Laurens van der Maaten +- DPN: [Dual Path Networks](https://arxiv.org/pdf/1707.01629.pdf), Yunpeng Chen, Jianan Li, Huaxin Xiao, Xiaojie Jin, Shuicheng Yan, Jiashi Feng - SqueezeNet: [SQUEEZENET: ALEXNET-LEVEL ACCURACY WITH 50X FEWER PARAMETERS AND <0.5MB MODEL SIZE](https://arxiv.org/abs/1602.07360), Forrest N. Iandola, Song Han, Matthew W. Moskewicz, Khalid Ashraf, William J. Dally, Kurt Keutzer - ResNeXt101_wsl: [Exploring the Limits of Weakly Supervised Pretraining](https://arxiv.org/abs/1805.00932), Dhruv Mahajan, Ross Girshick, Vignesh Ramanathan, Kaiming He, Manohar Paluri, Yixuan Li, Ashwin Bharambe, Laurens van der Maaten - Fix_ResNeXt101_wsl: [Fixing the train-test resolution discrepancy](https://arxiv.org/abs/1906.06423), Hugo Touvron, Andrea Vedaldi, Matthijs Douze, Herve ́ Je ́gou @@ -358,9 +383,10 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - 2019/04/01 **Stage3**: Update ResNet18, ResNet34, GoogLeNet, ShuffleNetV2 - 2019/06/12 **Stage4**:Update ResNet50_vc, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd, SE154_vd InceptionV4, ResNeXt101_64x4d, ResNeXt101_vd_64x4d - 2019/06/22 Update ResNet50_vd_v2 -- 2019/07/02 **Stage5**: Update MobileNetV2_x0_5, ResNeXt50_32x4d, ResNeXt50_64x4d, Xception_41, ResNet101_vd +- 2019/07/02 **Stage5**: Update MobileNetV2_x0_5, ResNeXt50_32x4d, ResNeXt50_64x4d, Xception41, ResNet101_vd - 2019/07/19 **Stage6**: Update ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, MobileNetV2_x0_25, MobileNetV2_x1_5, MobileNetV2_x2_0, ResNeXt50_vd_64x4d, ResNeXt101_32x4d, ResNeXt152_32x4d - 2019/08/01 **Stage7**: Update DarkNet53, DenseNet121. Densenet161, DenseNet169, DenseNet201, DenseNet264, SqueezeNet1_0, SqueezeNet1_1, ResNeXt50_vd_32x4d, ResNeXt152_64x4d, ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl +- 2019/09/11 **Stage8**: Update ResNet18_vd,ResNet34_vd,MobileNetV1_x0_25,MobileNetV1_x0_5,MobileNetV1_x0_75,MobileNetV2_x0_75,MobilenNetV3_small_x1_0,DPN68,DPN92,DPN98,DPN107,DPN131,ResNeXt101_vd_32x4d,ResNeXt152_vd_64x4d,Xception65,Xception71,Xception41_deeplab,Xception65_deeplab,SE_ResNet50_vd ## Contribute diff --git a/PaddleCV/image_classification/models/__init__.py b/PaddleCV/image_classification/models/__init__.py index d3e4b884..5ad807e6 100644 --- a/PaddleCV/image_classification/models/__init__.py +++ b/PaddleCV/image_classification/models/__init__.py @@ -13,23 +13,26 @@ #limitations under the License. from .alexnet import AlexNet -from .mobilenet import MobileNet, MobileNetV1 -from .mobilenet_v2 import MobileNetV2, MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x1_0, MobileNetV2_x1_5, MobileNetV2_x2_0, MobileNetV2_scale +from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75, MobileNetV1 +from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0 +from .mobilenet_v3 import MobileNetV3_small_x0_25, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_25, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25 from .googlenet import GoogLeNet from .vgg import VGG11, VGG13, VGG16, VGG19 from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc -from .resnet_vd import ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd +from .resnet_vd import ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d, ResNeXt50_32x4d, ResNeXt101_32x4d, ResNeXt152_32x4d from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_64x4d, ResNeXt50_vd_32x4d, ResNeXt101_vd_32x4d, ResNeXt152_vd_32x4d from .inception_v4 import InceptionV4 +from .se_resnet_vd import SE_ResNet18_vd, SE_ResNet34_vd, SE_ResNet50_vd, SE_ResNet101_vd, SE_ResNet152_vd, SE_ResNet200_vd from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d -from .se_resnext_vd import SE_ResNeXt50_32x4d_vd, SE_ResNeXt101_32x4d_vd, SE_154_vd +from .se_resnext_vd import SE_ResNeXt50_vd_32x4d, SE_ResNeXt101_vd_32x4d, SENet154_vd from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 from .shufflenet_v2_swish import ShuffleNetV2_swish, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2 from .fast_imagenet import FastImageNet -from .xception import Xception_41, Xception_65, Xception_71 +from .xception import Xception41, Xception65, Xception71 +from .xception_deeplab import Xception41_deeplab , Xception65_deeplab , Xception71_deeplab from .densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264 from .squeezenet import SqueezeNet1_0, SqueezeNet1_1 from .darknet import DarkNet53 diff --git a/PaddleCV/image_classification/models/dpn.py b/PaddleCV/image_classification/models/dpn.py index 4ab6d6b0..36777b62 100644 --- a/PaddleCV/image_classification/models/dpn.py +++ b/PaddleCV/image_classification/models/dpn.py @@ -37,7 +37,7 @@ class DPN(object): args = self.get_net_args(self.layers) bws = args['bw'] inc_sec = args['inc_sec'] - rs = args['bw'] + rs = args['r'] k_r = args['k_r'] k_sec = args['k_sec'] G = args['G'] @@ -121,12 +121,10 @@ class DPN(object): pool_type='avg', ) stdv = 0.01 - param_attr = fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv)) fc6 = fluid.layers.fc(input=pool5, size=class_dim, - param_attr=param_attr, - name="fc6") + param_attr=ParamAttr(initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) return fc6 @@ -317,7 +315,7 @@ def DPN68(): def DPN92(): - onvodel = DPN(layers=92) + model = DPN(layers=92) return model diff --git a/PaddleCV/image_classification/models/mobilenet.py b/PaddleCV/image_classification/models/mobilenet_v1.py similarity index 91% rename from PaddleCV/image_classification/models/mobilenet.py rename to PaddleCV/image_classification/models/mobilenet_v1.py index d6ecca49..e6d15ea8 100644 --- a/PaddleCV/image_classification/models/mobilenet.py +++ b/PaddleCV/image_classification/models/mobilenet_v1.py @@ -20,14 +20,15 @@ import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -__all__ = ['MobileNet', 'MobileNetV1'] +__all__ = ['MobileNetV1', 'MobileNetV1_x0_25', 'MobileNetV1_x0_5', 'MobileNetV1_x0_75'] -class MobileNet(): - def __init__(self): - pass +class MobileNetV1(): + def __init__(self, scale=1.0): + self.scale = scale - def net(self, input, class_dim=1000, scale=1.0): + def net(self, input, class_dim=1000): + scale = self.scale # conv1: 112x112 input = self.conv_bn_layer( input, @@ -194,6 +195,21 @@ class MobileNet(): return pointwise_conv +def MobileNetV1_x0_25(): + model = MobileNetV1(scale=0.25) + return model + + +def MobileNetV1_x0_5(): + model = MobileNetV1(scale=0.5) + return model + + +def MobileNetV1_x0_75(): + model = MobileNetV1(scale=0.75) + return model + + def MobileNetV1(): - model = MobileNet() + model = MobileNetV1(scale=1.0) return model diff --git a/PaddleCV/image_classification/models/mobilenet_v2.py b/PaddleCV/image_classification/models/mobilenet_v2.py index c21316ee..2070634e 100644 --- a/PaddleCV/image_classification/models/mobilenet_v2.py +++ b/PaddleCV/image_classification/models/mobilenet_v2.py @@ -20,21 +20,18 @@ from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr __all__ = [ - 'MobileNetV2', 'MobileNetV2_x0_25, ' - 'MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', - 'MobileNetV2_x2_0', 'MobileNetV2_scale' + 'MobileNetV2_x0_25', 'MobileNetV2_x0_5' + 'MobileNetV2_x0_75', 'MobileNetV2', 'MobileNetV2_x1_5', + 'MobileNetV2_x2_0', ] class MobileNetV2(): - def __init__(self, scale=1.0, change_depth=False): + def __init__(self, scale=1.0): self.scale = scale - self.change_depth = change_depth def net(self, input, class_dim=1000): scale = self.scale - change_depth = self.change_depth - #if change_depth is True, the new depth is 1.4 times as deep as before. bottleneck_params_list = [ (1, 16, 1, 1), (6, 24, 2, 2), @@ -43,14 +40,6 @@ class MobileNetV2(): (6, 96, 3, 1), (6, 160, 3, 2), (6, 320, 1, 1), - ] if change_depth == False else [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 5, 2), - (6, 64, 7, 2), - (6, 96, 5, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), ] #conv1 @@ -221,11 +210,17 @@ def MobileNetV2_x0_5(): return model -def MobileNetV2_x1_0(): +def MobileNetV2_x0_75(): + model = MobileNetV2(scale=0.75) + return model + + +def MobileNetV2(): model = MobileNetV2(scale=1.0) return model + def MobileNetV2_x1_5(): model = MobileNetV2(scale=1.5) return model @@ -234,8 +229,3 @@ def MobileNetV2_x1_5(): def MobileNetV2_x2_0(): model = MobileNetV2(scale=2.0) return model - - -def MobileNetV2_scale(): - model = MobileNetV2(scale=1.2, change_depth=True) - return model diff --git a/PaddleCV/image_classification/models/mobilenet_v3.py b/PaddleCV/image_classification/models/mobilenet_v3.py new file mode 100644 index 00000000..52dc9818 --- /dev/null +++ b/PaddleCV/image_classification/models/mobilenet_v3.py @@ -0,0 +1,265 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['MobileNetV3','MobileNetV3_small_x0_25', 'MobileNetV3_small_x0_5', 'MobileNetV3_small_x0_75', + 'MobileNetV3_small_x1_0', 'MobileNetV3_small_x1_25', 'MobileNetV3_large_x0_25', 'MobileNetV3_large_x0_5', + 'MobileNetV3_large_x0_75', 'MobileNetV3_large_x1_0', 'MobileNetV3_large_x1_25'] + + +class MobileNetV3(): + def __init__(self, scale=1.0, model_name='small'): + self.scale = scale + self.inplanes=16 + if model_name == "large": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, 'relu', 1], + [3, 64, 24, False, 'relu', 2], + [3, 72, 24, False, 'relu', 1], + [5, 72, 40, True, 'relu', 2], + [5, 120, 40, True, 'relu', 1], + [5, 120, 40, True, 'relu', 1], + [3, 240, 80, False, 'hard_swish', 2], + [3, 200, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 184, 80, False, 'hard_swish', 1], + [3, 480, 112, True, 'hard_swish', 1], + [3, 672, 112, True, 'hard_swish', 1], + [5, 672, 160, True, 'hard_swish', 2], + [5, 960, 160, True, 'hard_swish', 1], + [5, 960, 160, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 960 + self.cls_ch_expand = 1280 + elif model_name == "small": + self.cfg = [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, 'relu', 2], + [3, 72, 24, False, 'relu', 2], + [3, 88, 24, False, 'relu', 1], + [5, 96, 40, True, 'hard_swish', 2], + [5, 240, 40, True, 'hard_swish', 1], + [5, 240, 40, True, 'hard_swish', 1], + [5, 120, 48, True, 'hard_swish', 1], + [5, 144, 48, True, 'hard_swish', 1], + [5, 288, 96, True, 'hard_swish', 2], + [5, 576, 96, True, 'hard_swish', 1], + [5, 576, 96, True, 'hard_swish', 1], + ] + self.cls_ch_squeeze = 576 + self.cls_ch_expand = 1280 + else: + raise NotImplementedError + + def net(self, input, class_dim=1000): + scale = self.scale + inplanes = self.inplanes + cfg = self.cfg + cls_ch_squeeze = self.cls_ch_squeeze + cls_ch_expand = self.cls_ch_expand + + #conv1 + conv = self.conv_bn_layer(input, + filter_size=3, + num_filters=int(scale*inplanes), + stride=2, + padding=1, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv1') + i = 0 + for layer_cfg in cfg: + conv = self.residual_unit(input=conv, + num_in_filter=inplanes, + num_mid_filter=int(scale*layer_cfg[1]), + num_out_filter=int(scale*layer_cfg[2]), + act=layer_cfg[4], + stride=layer_cfg[5], + filter_size=layer_cfg[0], + use_se=layer_cfg[3], + name='conv' + str(i+2)) + inplanes = int(scale*layer_cfg[2]) + i += 1 + + conv = self.conv_bn_layer(input=conv, + filter_size=1, + num_filters=int(scale*cls_ch_squeeze), + stride=1, + padding=0, + num_groups=1, + if_act=True, + act='hard_swish', + name='conv_last') + conv = fluid.layers.pool2d(input=conv, pool_type='avg', global_pooling=True, use_cudnn=False) + conv = fluid.layers.conv2d( + input=conv, + num_filters=cls_ch_expand, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr(name='last_1x1_conv_weights'), + bias_attr=False) + conv = self.hard_swish(conv) + drop = fluid.layers.dropout(x=conv, dropout_prob=0.2) + out = fluid.layers.fc(input=drop, + size=class_dim, + param_attr=ParamAttr(name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) + return out + + + + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + if_act=True, + act=None, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name+'_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm(input=conv, + param_attr = ParamAttr(name=bn_name+"_scale"), + bias_attr=ParamAttr(name=bn_name+"_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + if act == 'relu': + bn = fluid.layers.relu(bn) + elif act == 'hard_swish': + bn = self.hard_swish(bn) + return bn + + def hard_swish(self, x): + return x * fluid.layers.relu6(x+3) / 6. + + def se_block(self, input, num_out_filter, ratio=4, name=None): + num_mid_filter = int(num_out_filter // ratio) + pool = fluid.layers.pool2d( + input=input, pool_type='avg', global_pooling=True, use_cudnn=False) + conv1 = fluid.layers.conv2d(input=pool, filter_size=1, num_filters=num_mid_filter, act='relu', + param_attr=ParamAttr(name=name+'_1_weights'), + bias_attr=ParamAttr(name=name+'_1_offset')) + conv2 = fluid.layers.conv2d(input=conv1, filter_size=1, num_filters=num_out_filter, act='hard_sigmoid', + param_attr=ParamAttr(name=name+'_2_weights'), + bias_attr=ParamAttr(name=name+'_2_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0) + return scale + + def residual_unit(self, input, num_in_filter, num_mid_filter, num_out_filter, stride, + filter_size, act=None, use_se=False, name=None): + + first_conv = (num_out_filter != num_mid_filter) + input_data=input + if first_conv: + input = self.conv_bn_layer(input=input, + filter_size=1, + num_filters=num_mid_filter, + stride=1, + padding=0, + if_act=True, + act=act, + name=name + '_expand') + + conv1 = self.conv_bn_layer(input=input, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size-1)//2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=True, + name=name + '_depthwise') + if use_se: + conv1 = self.se_block(input=conv1, num_out_filter=num_mid_filter, name=name+'_se') + + + conv2 = self.conv_bn_layer(input=conv1, + filter_size=1, + num_filters=num_out_filter, + stride=1, + padding=0, + if_act=False, + name=name + '_linear') + if num_in_filter != num_out_filter or stride != 1: + return conv2 + else: + return fluid.layers.elementwise_add(x=input_data, y=conv2, act=None) + + +def MobileNetV3_small_x0_25(): + model=MobileNetV3(model_name='small', scale=0.25) + return model + +def MobileNetV3_small_x0_5(): + model=MobileNetV3(model_name='small', scale=0.5) + return model + +def MobileNetV3_small_x0_75(): + model=MobileNetV3(model_name='small', scale=0.75) + return model + +def MobileNetV3_small_x1_0(): + model=MobileNetV3(model_name='small', scale=1.0) + return model + +def MobileNetV3_small_x1_25(): + model=MobileNetV3(model_name='small', scale=1.25) + return model + +def MobileNetV3_large_x0_25(): + model=MobileNetV3(model_name='large', scale=0.25) + return model + +def MobileNetV3_large_x0_5(): + model=MobileNetV3(model_name='large', scale=0.5) + return model + +def MobileNetV3_large_x0_75(): + model=MobileNetV3(model_name='large', scale=0.75) + return model + +def MobileNetV3_large_x1_0(): + model=MobileNetV3(model_name='large', scale=1.0) + return model + +def MobileNetV3_large_x1_25(): + model=MobileNetV3(model_name='large', scale=1.25) + return model diff --git a/PaddleCV/image_classification/models/model_libs.py b/PaddleCV/image_classification/models/model_libs.py new file mode 100644 index 00000000..a0b97c50 --- /dev/null +++ b/PaddleCV/image_classification/models/model_libs.py @@ -0,0 +1,128 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +import contextlib + +bn_regularizer = fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0) +name_scope = "" + +@contextlib.contextmanager +def scope(name): + global name_scope + bk = name_scope + name_scope = name_scope + name + '/' + yield + name_scope = bk + +def max_pool(input, kernel, stride, padding): + data = fluid.layers.pool2d(input, pool_size=kernel, pool_type='max', + pool_stride=stride, pool_padding=padding) + return data + +def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None): + N, C, H, W = input.shape + if C % G != 0: + # print "group can not divide channle:", C, G + for d in range(10): + for t in [d, -d]: + if G + t <= 0: continue + if C % (G + t) == 0: + G = G + t + break + if C % G == 0: + # print "use group size:", G + break + assert C % G == 0 + x = fluid.layers.group_norm( + input, + groups=G, + param_attr=param_attr, + bias_attr=bias_attr, + name=name_scope + 'group_norm') + return x + +def bn(*args, **kargs): + with scope('BatchNorm'): + return fluid.layers.batch_norm( + *args, + epsilon=1e-3, + momentum=0.99, + param_attr=fluid.ParamAttr( + name=name_scope + 'gamma', regularizer=bn_regularizer), + bias_attr=fluid.ParamAttr( + name=name_scope + 'beta', regularizer=bn_regularizer), + moving_mean_name=name_scope + 'moving_mean', + moving_variance_name=name_scope + 'moving_variance', + **kargs) + +def bn_relu(data): + return fluid.layers.relu(bn(data)) + +def relu(data): + return fluid.layers.relu(data) + +def conv(*args, **kargs): + kargs['param_attr'] = name_scope + 'weights' + if 'bias_attr' in kargs and kargs['bias_attr']: + kargs['bias_attr'] = fluid.ParamAttr( + name=name_scope + 'biases', + regularizer=None, + initializer=fluid.initializer.ConstantInitializer(value=0.0)) + else: + kargs['bias_attr'] = False + return fluid.layers.conv2d(*args, **kargs) + +def deconv(*args, **kargs): + kargs['param_attr'] = name_scope + 'weights' + if 'bias_attr' in kargs and kargs['bias_attr']: + kargs['bias_attr'] = name_scope + 'biases' + else: + kargs['bias_attr'] = False + return fluid.layers.conv2d_transpose(*args, **kargs) + +def seperate_conv(input, channel, stride, filter, dilation=1, act=None): + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=fluid.regularizer.L2DecayRegularizer( + regularization_coeff=0.0), + initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.33)) + with scope('depthwise'): + input = conv( + input, + input.shape[1], + filter, + stride, + groups=input.shape[1], + padding=(filter // 2) * dilation, + dilation=dilation, + use_cudnn=False, + param_attr=param_attr) + input = bn(input) + if act: input = act(input) + + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=None, + initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.06)) + with scope('pointwise'): + input = conv(input, channel, 1, 1, groups=1, padding=0, + param_attr=param_attr) + input = bn(input) + if act: input = act(input) + return input diff --git a/PaddleCV/image_classification/models/resnet_vd.py b/PaddleCV/image_classification/models/resnet_vd.py index 2d3fb657..bb04e2f6 100644 --- a/PaddleCV/image_classification/models/resnet_vd.py +++ b/PaddleCV/image_classification/models/resnet_vd.py @@ -23,7 +23,7 @@ import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr __all__ = [ - "ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd" + "ResNet", "ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd" ] @@ -35,11 +35,13 @@ class ResNet(): def net(self, input, class_dim=1000): is_3x3 = self.is_3x3 layers = self.layers - supported_layers = [50, 101, 152, 200] + supported_layers = [18, 34, 50, 101, 152, 200] assert layers in supported_layers, \ "supported layers are {} but input layer is {}".format(supported_layers, layers) - if layers == 50: + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: depth = [3, 4, 6, 3] elif layers == 101: depth = [3, 4, 23, 3] @@ -84,22 +86,33 @@ class ResNet(): pool_stride=2, pool_padding=1, pool_type='max') - - for block in range(len(depth)): - for i in range(depth[block]): - if layers in [101, 152, 200] and block == 2: - if i == 0: - conv_name = "res" + str(block + 2) + "a" + + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) else: - conv_name = "res" + str(block + 2) + "b" + str(i) - else: - conv_name = "res" + str(block + 2) + chr(97 + i) - conv = self.bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - if_first=block == 0, - name=conv_name) + conv_name = "res" + str(block + 2) + chr(97 + i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block==i==0, + name=conv_name) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name="res"+str(block+2)+chr(97+i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block==i==0, + name=conv_name) pool = fluid.layers.pool2d( input=conv, pool_type='avg', global_pooling=True) @@ -156,7 +169,8 @@ class ResNet(): pool_size=2, pool_stride=2, pool_padding=0, - pool_type='avg') + pool_type='avg', + ceil_mode=True) conv = fluid.layers.conv2d( input=pool, @@ -186,10 +200,12 @@ class ResNet(): if if_first: return self.conv_bn_layer(input, ch_out, 1, stride, name=name) else: - return self.conv_bn_layer_new( - input, ch_out, 1, stride, name=name) + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) else: return input + def bottleneck_block(self, input, num_filters, stride, name, if_first): conv0 = self.conv_bn_layer( @@ -220,6 +236,38 @@ class ResNet(): name=name + "_branch1") return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') + + + def basic_block(self, input, num_filters, stride, name, if_first): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name+"_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name+"_branch2b") + short = self.shortcut( + input, + num_filters, + stride, + if_first=if_first, + name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + +def ResNet18_vd(): + model=ResNet(layers=18, is_3x3=True) + return model + + +def ResNet34_vd(): + model=ResNet(layers=34, is_3x3=True) + return model def ResNet50_vd(): diff --git a/PaddleCV/image_classification/models/resnext_vd.py b/PaddleCV/image_classification/models/resnext_vd.py index 9d6c1f6b..92e366fe 100644 --- a/PaddleCV/image_classification/models/resnext_vd.py +++ b/PaddleCV/image_classification/models/resnext_vd.py @@ -158,7 +158,8 @@ class ResNeXt(): pool_size=2, pool_stride=2, pool_padding=0, - pool_type='avg') + pool_type='avg', + ceil_mode=True) conv = fluid.layers.conv2d( input=pool, diff --git a/PaddleCV/image_classification/models/se_resnet_vd.py b/PaddleCV/image_classification/models/se_resnet_vd.py new file mode 100644 index 00000000..aa8b910d --- /dev/null +++ b/PaddleCV/image_classification/models/se_resnet_vd.py @@ -0,0 +1,295 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +import math + +__all__ = ["SE_ResNet_vd", "SE_ResNet18_vd","SE_ResNet34_vd", "SE_ResNet50_vd", "SE_ResNet101_vd", "SE_ResNet152_vd", + "SE_ResNet200_vd"] + + +class SE_ResNet_vd(): + def __init__(self, layers=50, is_3x3=False): + self.layers = layers + self.is_3x3 = is_3x3 + + def net(self, input, class_dim=1000): + is_3x3 = self.is_3x3 + layers = self.layers + supported_layers = [18, 34, 50, 101, 152, 200] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 18: + depth = [2, 2, 2, 2] + elif layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + elif layers == 200: + depth = [3, 12, 48, 3] + num_filters = [64, 128, 256, 512] + reduction_ratio = 16 + if is_3x3 == False: + conv = self.conv_bn_layer( + input=input, num_filters=64, filter_size=7, stride=2, act='relu') + else: + conv = self.conv_bn_layer( + input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + conv = self.conv_bn_layer( + input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152, 200] and block == 2: + if i == 0: + conv_name="res"+str(block+2)+"a" + else: + conv_name="res"+str(block+2)+"b"+str(i) + else: + conv_name="res"+str(block+2)+chr(97+i) + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block==i==0, + reduction_ratio=reduction_ratio, + name=conv_name) + + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name="res"+str(block+2)+chr(97+i) + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + if_first=block==i==0, + reduction_ratio=reduction_ratio, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + + return out + + + + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm(input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + def conv_bn_layer_new(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d(input=input, + pool_size=2, + pool_stride=2, + pool_padding=0, + pool_type='avg', + ceil_mode=True) + + conv = fluid.layers.conv2d( + input=pool, + num_filters=num_filters, + filter_size=filter_size, + stride=1, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False) + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm(input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + + def shortcut(self, input, ch_out, stride, name, if_first=False): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + if if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + elif if_first: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name, if_first, reduction_ratio): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name+"_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name+"_branch2b") + conv2 =self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name+"_branch2c") + scale = self.squeeze_excitation( + input=conv2, + num_channels=num_filters * 4, + reduction_ratio=reduction_ratio, + name='fc_'+name) + + short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + def basic_block(self, input, num_filters, stride, name, if_first, reduction_ratio): + conv0 = self.conv_bn_layer(input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name+"_branch2a") + conv1 = self.conv_bn_layer(input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name+"_branch2b") + scale = self.squeeze_excitation( + input=conv1, + num_channels=num_filters, + reduction_ratio=reduction_ratio, + name='fc_'+name) + short = self.shortcut(input, + num_filters, + stride, + if_first=if_first, + name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=scale, act='relu') + + + def squeeze_excitation(self, input, num_channels, reduction_ratio, name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=0, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + squeeze = fluid.layers.fc(input=pool, + size=num_channels // reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform( + -stdv, stdv),name=name+'_sqz_weights'), + bias_attr=ParamAttr(name=name+'_sqz_offset')) + stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) + excitation = fluid.layers.fc(input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name+'_exc_weights'), + bias_attr=ParamAttr(name=name+'_exc_offset')) + scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) + return scale + +def SE_ResNet18_vd(): + model = SE_ResNet_vd(layers=18, is_3x3 = True) + return model + +def SE_ResNet34_vd(): + model = SE_ResNet_vd(layers=34, is_3x3 = True) + return model + +def SE_ResNet50_vd(): + model = SE_ResNet_vd(layers=50, is_3x3 = True) + return model + +def SE_ResNet101_vd(): + model = SE_ResNet_vd(layers=101, is_3x3 = True) + return model + +def SE_ResNet152_vd(): + model = SE_ResNet_vd(layers=152, is_3x3 = True) + return model + +def SE_ResNet200_vd(): + model = SE_ResNet_vd(layers=200, is_3x3 = True) + return model + diff --git a/PaddleCV/image_classification/models/se_resnext_vd.py b/PaddleCV/image_classification/models/se_resnext_vd.py index 3216ec29..8fe90a92 100644 --- a/PaddleCV/image_classification/models/se_resnext_vd.py +++ b/PaddleCV/image_classification/models/se_resnext_vd.py @@ -23,11 +23,11 @@ import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr __all__ = [ - "SE_ResNeXt", "SE_ResNeXt50_32x4d_vd", "SE_ResNeXt101_32x4d_vd", "SE154_vd" + "SE_ResNeXt_vd", "SE_ResNeXt50_32x4d_vd", "SE_ResNeXt101_32x4d_vd", "SENet154_vd" ] -class SE_ResNeXt(): +class SE_ResNeXt_vd(): def __init__(self, layers=50): self.layers = layers @@ -262,7 +262,8 @@ class SE_ResNeXt(): pool_size=2, pool_stride=2, pool_padding=0, - pool_type='avg') + pool_type='avg', + ceil_mode=True) conv = fluid.layers.conv2d( input=pool, @@ -312,16 +313,16 @@ class SE_ResNeXt(): return scale -def SE_ResNeXt50_32x4d_vd(): - model = SE_ResNeXt(layers=50) +def SE_ResNeXt50_vd_32x4d(): + model = SE_ResNeXt_vd(layers=50) return model -def SE_ResNeXt101_32x4d_vd(): - model = SE_ResNeXt(layers=101) +def SE_ResNeXt101_vd_32x4d(): + model = SE_ResNeXt_vd(layers=101) return model -def SE_154_vd(): - model = SE_ResNeXt(layers=152) +def SENet154_vd(): + model = SE_ResNeXt_vd(layers=152) return model diff --git a/PaddleCV/image_classification/models/xception.py b/PaddleCV/image_classification/models/xception.py index 89bc1818..de2b7646 100644 --- a/PaddleCV/image_classification/models/xception.py +++ b/PaddleCV/image_classification/models/xception.py @@ -22,7 +22,7 @@ import math import sys from paddle.fluid.param_attr import ParamAttr -__all__ = ['Xception', 'Xception_41', 'Xception_65', 'Xception_71'] +__all__ = ['Xception', 'Xception41', 'Xception65', 'Xception71'] class Xception(object): @@ -252,34 +252,29 @@ class Xception(object): act=None, param_attr=ParamAttr(name=name + "_weights"), bias_attr=False, - use_cudnn=use_cudnn, - name=name + '.conv2d.output.1') + use_cudnn=use_cudnn) bn_name = "bn_" + name return fluid.layers.batch_norm( input=conv, act=act, - name=bn_name + '.output.1', param_attr=ParamAttr(name=bn_name + '_scale'), bias_attr=ParamAttr(bn_name + '_offset'), moving_mean_name=bn_name + '_mean', moving_variance_name=bn_name + '_variance') -def Xception_41(): - """Xception_41""" +def Xception41(): model = Xception(entry_flow_block_num=3, middle_flow_block_num=8) return model -def Xception_65(): - """Xception_65""" +def Xception65(): model = Xception(entry_flow_block_num=3, middle_flow_block_num=16) return model -def Xception_71(): - """Xception_71""" +def Xception71(): model = Xception(entry_flow_block_num=5, middle_flow_block_num=16) return model diff --git a/PaddleCV/image_classification/models/xception_deeplab.py b/PaddleCV/image_classification/models/xception_deeplab.py new file mode 100644 index 00000000..a8138ed9 --- /dev/null +++ b/PaddleCV/image_classification/models/xception_deeplab.py @@ -0,0 +1,272 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import contextlib +import paddle +import math +import paddle.fluid as fluid +from .model_libs import scope, name_scope +from .model_libs import bn, bn_relu, relu +from .model_libs import conv +from .model_libs import seperate_conv + +__all__ = ['Xception41_deeplab', 'Xception65_deeplab', 'Xception71_deeplab'] + +def check_data(data, number): + if type(data) == int: + return [data] * number + assert len(data) == number + return data + +def check_stride(s, os): + if s <= os: + return True + else: + return False + +def check_points(count, points): + if points is None: + return False + else: + if isinstance(points, list): + return (True if count in points else False) + else: + return (True if count == points else False) + +class Xception(): + def __init__(self, backbone="xception_65"): + self.bottleneck_params = self.gen_bottleneck_params(backbone) + self.backbone = backbone + + def gen_bottleneck_params(self, backbone='xception_65'): + if backbone == 'xception_65': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": (2, [2, 1],[[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_41': + bottleneck_params = { + "entry_flow": (3, [2, 2, 2], [128, 256, 728]), + "middle_flow": (8, 1, 728), + "exit_flow": (2, [2, 1],[[728, 1024, 1024], [1536, 1536, 2048]]) + } + elif backbone == 'xception_71': + bottleneck_params = { + "entry_flow": (5, [2, 1, 2, 1, 2], [128, 256, 256, 728, 728]), + "middle_flow": (16, 1, 728), + "exit_flow": (2, [2, 1],[[728, 1024, 1024], [1536, 1536, 2048]]) + } + else: + raise Exception("xception backbont only support xception_41/xception_65/xception_71") + return bottleneck_params + + def net(self, + input, + output_stride=32, + class_dim=1000, + end_points=None, + decode_points=None): + self.stride = 2 + self.block_point = 0 + self.output_stride = output_stride + self.decode_points = decode_points + self.short_cuts = dict() + with scope(self.backbone): + # Entry flow + data = self.entry_flow(input) + if check_points(self.block_point, end_points): + return data, self.short_cuts + + # Middle flow + data = self.middle_flow(data) + if check_points(self.block_point, end_points): + return data, self.short_cuts + + # Exit flow + data = self.exit_flow(data) + if check_points(self.block_point, end_points): + return data, self.short_cuts + + data = fluid.layers.reduce_mean(data, [2, 3], keep_dim=True) + data = fluid.layers.dropout(data, 0.5) + stdv = 1.0 / math.sqrt(data.shape[1] * 1.0) + with scope("logit"): + out = fluid.layers.fc(input=data, size=class_dim, + param_attr=fluid.param_attr.ParamAttr(name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr(name='fc_bias')) + + return out + + def entry_flow(self, data): + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=None, + initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) + with scope("entry_flow"): + with scope("conv1"): + data = bn_relu(conv(data, 32, 3, stride=2, padding=1, param_attr=param_attr)) + with scope("conv2"): + data = bn_relu(conv(data, 64, 3, stride=1, padding=1, param_attr=param_attr)) + + # get entry flow params + block_num = self.bottleneck_params["entry_flow"][0] + strides = self.bottleneck_params["entry_flow"][1] + chns = self.bottleneck_params["entry_flow"][2] + strides = check_data(strides, block_num) + chns = check_data(chns, block_num) + + # params to control your flow + s = self.stride + block_point = self.block_point + output_stride = self.output_stride + with scope("entry_flow"): + for i in range(block_num): + block_point = block_point + 1 + with scope("block" + str(i + 1)): + stride = strides[i] if check_stride(s*strides[i], output_stride) else 1 + data, short_cuts = self.xception_block(data, chns[i], [1, 1, stride]) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + + self.stride = s + self.block_point = block_point + return data + + def middle_flow(self, data): + block_num = self.bottleneck_params["middle_flow"][0] + strides = self.bottleneck_params["middle_flow"][1] + chns = self.bottleneck_params["middle_flow"][2] + strides = check_data(strides, block_num) + chns = check_data(chns, block_num) + + # params to control your flow + s = self.stride + block_point = self.block_point + output_stride = self.output_stride + with scope("middle_flow"): + for i in range(block_num): + block_point = block_point + 1 + with scope("block" + str(i + 1)): + stride = strides[i] if check_stride(s*strides[i], output_stride) else 1 + data, short_cuts = self.xception_block(data, chns[i], [1, 1, strides[i]], skip_conv=False) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + + self.stride = s + self.block_point = block_point + return data + + def exit_flow(self, data): + block_num = self.bottleneck_params["exit_flow"][0] + strides = self.bottleneck_params["exit_flow"][1] + chns = self.bottleneck_params["exit_flow"][2] + strides = check_data(strides, block_num) + chns = check_data(chns, block_num) + + assert(block_num==2) + # params to control your flow + s = self.stride + block_point = self.block_point + output_stride = self.output_stride + with scope("exit_flow"): + with scope('block1'): + block_point += 1 + stride = strides[0] if check_stride(s*strides[0], output_stride) else 1 + data, short_cuts = self.xception_block(data, chns[0], [1, 1, stride]) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + with scope('block2'): + block_point += 1 + stride = strides[1] if check_stride(s*strides[1], output_stride) else 1 + data, short_cuts = self.xception_block( + data, chns[1], [1, 1, stride], + dilation=2, + has_skip=False, + activation_fn_in_separable_conv=True) + s = s * stride + if check_points(block_point, self.decode_points): + self.short_cuts[block_point] = short_cuts[1] + + self.stride = s + self.block_point = block_point + return data + + def xception_block(self, + input, + channels, + strides=1, + filters=3, + dilation=1, + skip_conv=True, + has_skip=True, + activation_fn_in_separable_conv=False): + repeat_number = 3 + channels = check_data(channels, repeat_number) + filters = check_data(filters, repeat_number) + strides = check_data(strides, repeat_number) + data = input + results = [] + for i in range(repeat_number): + with scope('separable_conv' + str(i + 1)): + if not activation_fn_in_separable_conv: + data = relu(data) + data = seperate_conv( + data, + channels[i], + strides[i], + filters[i], + dilation=dilation) + else: + data = seperate_conv( + data, + channels[i], + strides[i], + filters[i], + dilation=dilation, + act=relu) + results.append(data) + if not has_skip: + return data, results + if skip_conv: + param_attr = fluid.ParamAttr( + name=name_scope + 'weights', + regularizer=None, + initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=0.09)) + with scope('shortcut'): + skip = bn( + conv(input, channels[-1], 1, strides[-1], groups=1, + padding=0, param_attr=param_attr)) + else: + skip = input + return data + skip, results + +def Xception41_deeplab(): + model = Xception("xception_41") + return model + +def Xception65_deeplab(): + model = Xception("xception_65") + return model + +def Xception71_deeplab(): + model = Xception("xception_71") + return model diff --git a/PaddleCV/image_classification/scripts/train/DPN107.sh b/PaddleCV/image_classification/scripts/train/DPN107.sh new file mode 100644 index 00000000..5fdcf18e --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DPN107.sh @@ -0,0 +1,15 @@ +#DPN107 +python train.py \ + --model=DPN107 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/DPN131.sh b/PaddleCV/image_classification/scripts/train/DPN131.sh new file mode 100644 index 00000000..9ad47671 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DPN131.sh @@ -0,0 +1,15 @@ +#DPN131 +python train.py \ + --model=DPN131 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/DPN68.sh b/PaddleCV/image_classification/scripts/train/DPN68.sh new file mode 100644 index 00000000..2397a267 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DPN68.sh @@ -0,0 +1,15 @@ +#DPN68 +python train.py \ + --model=DPN68 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/DPN92.sh b/PaddleCV/image_classification/scripts/train/DPN92.sh new file mode 100644 index 00000000..27578a46 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DPN92.sh @@ -0,0 +1,15 @@ +#DPN92 +python train.py \ + --model=DPN92 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/DPN98.sh b/PaddleCV/image_classification/scripts/train/DPN98.sh new file mode 100644 index 00000000..150f461b --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DPN98.sh @@ -0,0 +1,15 @@ +#DPN98 +python train.py \ + --model=DPN98 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV1.sh b/PaddleCV/image_classification/scripts/train/MobileNetV1.sh index 8d00ce7c..4f9f72f6 100644 --- a/PaddleCV/image_classification/scripts/train/MobileNetV1.sh +++ b/PaddleCV/image_classification/scripts/train/MobileNetV1.sh @@ -7,7 +7,7 @@ export FLAGS_fraction_of_gpu_memory_to_use=0.98 python train.py \ - --model=MobileNetV1 \ + --model=MobileNetV1_x1_0 \ --batch_size=256 \ --total_images=1281167 \ --class_dim=1000 \ diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_25.sh b/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_25.sh new file mode 100644 index 00000000..aa7f74ba --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_25.sh @@ -0,0 +1,17 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + + +python train.py \ + --model=MobileNetV1_x0_25 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.1 \ + --l2_decay=3e-5 \ diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_5.sh b/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_5.sh new file mode 100644 index 00000000..85fdbfdc --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_5.sh @@ -0,0 +1,17 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + + +python train.py \ + --model=MobileNetV1_x0_5 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.1 \ + --l2_decay=3e-5 \ diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_75.sh b/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_75.sh new file mode 100644 index 00000000..ceeba744 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV1_x0_75.sh @@ -0,0 +1,17 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + + +python train.py \ + --model=MobileNetV1_x0_75 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.1 \ + --l2_decay=3e-5 \ diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV2.sh b/PaddleCV/image_classification/scripts/train/MobileNetV2.sh index 7a0ce41c..d8eb1ea7 100644 --- a/PaddleCV/image_classification/scripts/train/MobileNetV2.sh +++ b/PaddleCV/image_classification/scripts/train/MobileNetV2.sh @@ -7,7 +7,7 @@ export FLAGS_fraction_of_gpu_memory_to_use=0.98 python train.py \ - --model=MobileNetV2 \ + --model=MobileNetV2_x1_0 \ --batch_size=500 \ --total_images=1281167 \ --class_dim=1000 \ diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_75.sh b/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_75.sh new file mode 100644 index 00000000..511cfa71 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_75.sh @@ -0,0 +1,17 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + + +python train.py \ + --model=MobileNetV2_x0_75 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --num_epochs=240 \ + --lr=0.045 \ + --l2_decay=4e-5 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh index e39008f5..91d8b5bb 100644 --- a/PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh +++ b/PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh @@ -1,14 +1,19 @@ -#ResNeXt101_32x4d #Training details +#Missed +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 +#ResNeXt101_32x4d python train.py \ - --model=ResNeXt101_32x4d \ - --batch_size=256 \ - --total_images=1281167 \ - --image_shape=3,224,224 \ - --class_dim=1000 \ - --lr_strategy=piecewise_decay \ - --lr=0.1 \ - --num_epochs=120 \ - --model_save_dir=output/ \ - --l2_decay=1e-4 + --model=ResNeXt101_32x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt101_vd_32x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt101_vd_32x4d.sh new file mode 100644 index 00000000..5e934480 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt101_vd_32x4d.sh @@ -0,0 +1,22 @@ +#Training details +#Missed +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNeXt101_vd_32x4d +python train.py \ + --model=ResNeXt101_vd_32x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt152_vd_64x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt152_vd_64x4d.sh new file mode 100644 index 00000000..b9663f27 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt152_vd_64x4d.sh @@ -0,0 +1,15 @@ +#ResNeXt152_vd_64x4d +python train.py \ + --model=ResNeXt152_vd_64x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/scripts/train/ResNet18_vd.sh b/PaddleCV/image_classification/scripts/train/ResNet18_vd.sh new file mode 100644 index 00000000..c95b9325 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet18_vd.sh @@ -0,0 +1,19 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +python train.py \ + --model=ResNet18_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=7e-5 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/ResNet34_vd.sh b/PaddleCV/image_classification/scripts/train/ResNet34_vd.sh new file mode 100644 index 00000000..56a31b69 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet34_vd.sh @@ -0,0 +1,19 @@ +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +python train.py \ + --model=ResNet34_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=7e-5 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/SE_154_vd.sh b/PaddleCV/image_classification/scripts/train/SENet154_vd.sh similarity index 95% rename from PaddleCV/image_classification/scripts/train/SE_154_vd.sh rename to PaddleCV/image_classification/scripts/train/SENet154_vd.sh index e61cc6fd..a363a108 100644 --- a/PaddleCV/image_classification/scripts/train/SE_154_vd.sh +++ b/PaddleCV/image_classification/scripts/train/SENet154_vd.sh @@ -7,7 +7,7 @@ export FLAGS_fraction_of_gpu_memory_to_use=0.98 #SE_154 python train.py \ - --model=SE_154_vd \ + --model=SENet154_vd \ --batch_size=256 \ --total_images=1281167 \ --image_shape=3,224,224 \ diff --git a/PaddleCV/image_classification/scripts/train/SE_ResNet50_vd.sh b/PaddleCV/image_classification/scripts/train/SE_ResNet50_vd.sh new file mode 100644 index 00000000..9bddaf75 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/SE_ResNet50_vd.sh @@ -0,0 +1,15 @@ +#SE_ResNet50_vd +python train.py \ + --model=SE_ResNet50_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/scripts/train/Xception_41.sh b/PaddleCV/image_classification/scripts/train/Xception41.sh similarity index 81% rename from PaddleCV/image_classification/scripts/train/Xception_41.sh rename to PaddleCV/image_classification/scripts/train/Xception41.sh index 57ec3910..1be8e5bb 100644 --- a/PaddleCV/image_classification/scripts/train/Xception_41.sh +++ b/PaddleCV/image_classification/scripts/train/Xception41.sh @@ -1,8 +1,8 @@ python train.py \ - --model=Xception_41 \ + --model=Xception41 \ --batch_size=256 \ --total_images=1281167 \ - --image_shape=3,224,224 \ + --image_shape=3,299,299 \ --class_dim=1000 \ --lr_strategy=cosine_decay \ --lr=0.045 \ diff --git a/PaddleCV/image_classification/scripts/train/Xception41_deeplab.sh b/PaddleCV/image_classification/scripts/train/Xception41_deeplab.sh new file mode 100644 index 00000000..0ba5fcc2 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/Xception41_deeplab.sh @@ -0,0 +1,13 @@ +#Xception41_deeplab +python train.py \ + --model=Xception41_deeplab \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,299,299 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.045 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --resize_short_size=320 diff --git a/PaddleCV/image_classification/scripts/train/Xception65.sh b/PaddleCV/image_classification/scripts/train/Xception65.sh new file mode 100644 index 00000000..a465194f --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/Xception65.sh @@ -0,0 +1,16 @@ +#Xception65 +python train.py \ + --model=Xception65 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,299,299 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --resize_short_size=320 \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/scripts/train/Xception65_deeplab.sh b/PaddleCV/image_classification/scripts/train/Xception65_deeplab.sh new file mode 100644 index 00000000..6cc49d85 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/Xception65_deeplab.sh @@ -0,0 +1,13 @@ +#Xception65_deeplab +python train.py \ + --model=Xception65_deeplab \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,299,299 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.045 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --resize_short_size=320 diff --git a/PaddleCV/image_classification/scripts/train/Xception71.sh b/PaddleCV/image_classification/scripts/train/Xception71.sh new file mode 100644 index 00000000..8e40eebc --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/Xception71.sh @@ -0,0 +1,16 @@ +#Xception71 +python train.py \ + --model=Xception71 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,299,299 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --resize_short_size=320 \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py index 42a627f3..fe3beed3 100644 --- a/PaddleCV/image_classification/utils/utility.py +++ b/PaddleCV/image_classification/utils/utility.py @@ -123,7 +123,7 @@ def parse_args(): #add_arg('use_fp16', bool, False, "Whether to enable half precision training with fp16." ) #add_arg('scale_loss', float, 1.0, "The value of scale_loss for fp16." ) add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing") - add_arg('label_smoothing_epsilon', float, 0.2, "The value of label_smoothing_epsilon parameter") + add_arg('label_smoothing_epsilon', float, 0.1, "The value of label_smoothing_epsilon parameter") #NOTE: (2019/08/08) temporary disable use_distill #add_arg('use_distill', bool, False, "Whether to use distill") add_arg('random_seed', int, None, "random seed") -- GitLab