From 97121cde11307e5cb1aba46a94ecd0b94a015278 Mon Sep 17 00:00:00 2001 From: cuicheng01 <45199522+cuicheng01@users.noreply.github.com> Date: Wed, 3 Jul 2019 11:33:02 +0800 Subject: [PATCH] Update models (#2683) * add mobilenet_v2_x0_5, resnext50_32x4d, resnext50_64x4d, xception41 pretrained models * Update README_cn.md --- PaddleCV/image_classification/README.md | 58 +++- PaddleCV/image_classification/README_cn.md | 54 +++- .../image_classification/models/__init__.py | 6 +- .../models/mobilenet_v2.py | 64 ++++- .../image_classification/models/resnext.py | 37 ++- .../image_classification/models/xception.py | 264 ++++++++++++++++++ 6 files changed, 447 insertions(+), 36 deletions(-) create mode 100644 PaddleCV/image_classification/models/xception.py diff --git a/PaddleCV/image_classification/README.md b/PaddleCV/image_classification/README.md index ada3b92c..4eb33872 100644 --- a/PaddleCV/image_classification/README.md +++ b/PaddleCV/image_classification/README.md @@ -166,20 +166,39 @@ The image classification models currently supported by PaddlePaddle are listed i As the activation function ```swish``` and ```relu6``` which separately used in ShuffleNetV2 and MobileNetV2 net are not supported by Paddle TensorRT, inference acceleration performance of them doesn't significient improve. Pretrained models can be downloaded by clicking related model names. - Note1: ResNet50_vd_v2 is the distilled version of ResNet50_vd. -- Note2: In addition to the image resolution feeded in InceptionV4 net is ```299x299```, others are ```224x224```. +- Note2: In addition to the image resolution feeded in InceptionV4 and Xception net is ```299x299```, others are ```224x224```. - Note3: It's necessary to convert the train model to a binary model when appling dynamic link library to infer, One can do it by running following command: ```python infer.py --save_inference=True``` - + +### AlexNet |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | 3.083 | 2.728 | + +### VGG +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | 8.223 | 6.821 | |[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | 9.512 | 7.783 | |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | 11.315 | 9.067 | |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | 13.096 | 10.388 | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 | 1.615 | -|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### MobileNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 |1.615 | +|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% | 4.514 | 4.150 | +|[MobileNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### ShuffleNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | + +### ResNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | 3.456 | 2.484 | |[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | 5.668 | 3.767 | |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | 8.787 | 5.434 | @@ -187,19 +206,36 @@ As the activation function ```swish``` and ```relu6``` which separately used in |[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | 9.058 | 5.510 | |[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84%/94.93% | 9.058 | 5.510 | |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | 15.447 | 8.779 | -|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 79.44%/94.47% | 15.685 | 8.878 | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17%/94.97% | 15.685 | 8.878 | |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | 21.816 | 12.148 | |[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | 22.041 | 12.259 | |[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93%/95.33% | 28.015 | 15.278 | -|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar) | 79.35%/94.52% | 41.073 | 38.736 | + +### ResNeXt +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75%/93.82% | 12.863 | 9.837 | +|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 28.162 | 18.271 | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 41.073 | 38.736 | |[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | 42.277 | 40.929 | + +### SENet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | 14.916 | 12.126 | |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | 30.085 | 24.110 | -|[SE154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | +|[SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | + +### Inception +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | -|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | +|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | |[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77%/95.26% | 32.413 | 18.154 | + + + ## FAQ **Q:** How to solve this problem when I try to train a 6-classes dataset with indicating pretrained_model parameter ? @@ -222,6 +258,7 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - MobileNetV2: [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381v4.pdf), Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen - VGG: [Very Deep Convolutional Networks for Large-scale Image Recognition](https://arxiv.org/pdf/1409.1556), Karen Simonyan, Andrew Zisserman - GoogLeNet: [Going Deeper with Convolutions](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf), Christian Szegedy1, Wei Liu2, Yangqing Jia +- Xception: [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357), Franc ̧ois Chollet - InceptionV4: [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261), Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi @@ -229,11 +266,12 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - 2018/12/03 **Stage1**: Update AlexNet, ResNet50, ResNet101, MobileNetV1 - 2018/12/23 **Stage2**: Update VGG Series, SeResNeXt50_32x4d, SeResNeXt101_32x4d, ResNet152 -- 2019/01/31 Update MobileNetV2 +- 2019/01/31 Update MobileNetV2_x1_0 - 2019/04/01 **Stage3**: Update ResNet18, ResNet34, GoogLeNet, ShuffleNetV2 - 2019/06/12 **Stage4**:Update ResNet50_vc, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd, SE154_vd InceptionV4, ResNeXt101_64x4d, ResNeXt101_vd_64x4d - 2019/06/22 Update ResNet50_vd_v2 +- 2019/07/02 Update MobileNetV2_x0_5, ResNeXt50_32x4d, ResNeXt50_64x4d, Xception_41, ResNet101_vd ## Contribute -If you can fix an issue or add a new feature, please open a PR to us. If your PR is accepted, you can get scores according to the quality and difficulty of your PR(0~5), while you got 10 scores, you can contact us for interview or recommendation letter. +If you can fix an issue or add a new feature, please open a PR to us. If your PR is accepted, you can get scores according to the quality and difficulty of your PR(0~5), while you got 10 scores, you can contact us for interview or recommendation letter. \ No newline at end of file diff --git a/PaddleCV/image_classification/README_cn.md b/PaddleCV/image_classification/README_cn.md index 0de76b32..0fc70ba1 100644 --- a/PaddleCV/image_classification/README_cn.md +++ b/PaddleCV/image_classification/README_cn.md @@ -155,20 +155,39 @@ python infer.py \ - 注意 1:ResNet50_vd_v2是ResNet50_vd蒸馏版本。 - 2:除了InceptionV4采用的输入图像的分辨率为299x299,其余模型测试时使用的分辨率均为224x224。 + 2:除了InceptionV4和Xception采用的输入图像的分辨率为299x299,其余模型测试时使用的分辨率均为224x224。 3:调用动态链接库预测时需要将训练模型转换为二进制模型 ```python infer.py --save_inference=True``` +### AlexNet |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | 3.083 | 2.728 | + +### VGG +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | 8.223 | 6.821 | |[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | 9.512 | 7.783 | |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | 11.315 | 9.067 | |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | 13.096 | 10.388 | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 | 1.615 | -|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### MobileNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 |1.615 | +|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% | 4.514 | 4.150 | +|[MobileNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### ShuffleNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | + +### ResNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | 3.456 | 2.484 | |[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | 5.668 | 3.767 | |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | 8.787 | 5.434 | @@ -176,20 +195,36 @@ python infer.py \ |[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | 9.058 | 5.510 | |[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84%/94.93% | 9.058 | 5.510 | |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | 15.447 | 8.779 | -|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 79.44%/94.47% | 15.685 | 8.878 | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17%/94.97% | 15.685 | 8.878 | |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | 21.816 | 12.148 | |[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | 22.041 | 12.259 | |[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93%/95.33% | 28.015 | 15.278 | -|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar) | 79.35%/94.52% | 41.073 | 38.736 | + +### ResNeXt +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75%/93.82% | 12.863 | 9.837 | +|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 28.162 | 18.271 | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 41.073 | 38.736 | |[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | 42.277 | 40.929 | + +### SENet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | 14.916 | 12.126 | |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | 30.085 | 24.110 | -|[SE154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | +|[SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | + +### Inception +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | -|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | +|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | |[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77%/95.26% | 32.413 | 18.154 | + + ## FAQ **Q:** 加载预训练模型报错,Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:1000 != labels_dims[1]:6. @@ -207,15 +242,18 @@ python infer.py \ - MobileNetV2: [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381v4.pdf), Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen - VGG: [Very Deep Convolutional Networks for Large-scale Image Recognition](https://arxiv.org/pdf/1409.1556), Karen Simonyan, Andrew Zisserman - GoogLeNet: [Going Deeper with Convolutions](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf), Christian Szegedy1, Wei Liu2, Yangqing Jia +- Xception: [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357), Franc ̧ois Chollet - InceptionV4: [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261), Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi ## 版本更新 - 2018/12/03 **Stage1**: 更新AlexNet,ResNet50,ResNet101,MobileNetV1 - 2018/12/23 **Stage2**: 更新VGG系列 SeResNeXt50_32x4d,SeResNeXt101_32x4d,ResNet152 -- 2019/01/31 更新MobileNetV2 +- 2019/01/31 更新MobileNetV2_x1_0 - 2019/04/01 **Stage3**: 更新ResNet18,ResNet34,GoogLeNet,ShuffleNetV2 - 2019/06/12 **Stage4**: 更新ResNet50_vc,ResNet50_vd,ResNet101_vd,ResNet152_vd,ResNet200_vd,SE154_vd InceptionV4,ResNeXt101_64x4d,ResNeXt101_vd_64x4d - 2019/06/22 更新ResNet50_vd_v2 +- 2019/07/02 更新MobileNetV2_x0_5,ResNeXt50_32x4d,ResNeXt50_64x4d,Xception_41,ResNet101_vd + ## 如何贡献代码 diff --git a/PaddleCV/image_classification/models/__init__.py b/PaddleCV/image_classification/models/__init__.py index 2661aaa2..814ec125 100644 --- a/PaddleCV/image_classification/models/__init__.py +++ b/PaddleCV/image_classification/models/__init__.py @@ -1,14 +1,13 @@ from .alexnet import AlexNet from .mobilenet import MobileNet -from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x1_0, MobileNetV2_x1_5, MobileNetV2_x2_0, MobileNetV2_scale from .googlenet import GoogleNet from .vgg import VGG11, VGG13, VGG16, VGG19 from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc from .resnet_vd import ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd -from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d +from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d, ResNeXt50_32x4d, ResNeXt101_32x4d, ResNeXt152_32x4d from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_64x4d - from .resnet_dist import DistResNet from .inception_v4 import InceptionV4 from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d @@ -16,3 +15,4 @@ from .se_resnext_vd import SE_ResNeXt50_32x4d_vd, SE_ResNeXt101_32x4d_vd, SE154_ from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 from .shufflenet_v2 import ShuffleNetV2, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish, ShuffleNetV2_x8_0_swish from .fast_imagenet import FastImageNet +from .xception import Xception_41, Xception_65, Xception_71 \ No newline at end of file diff --git a/PaddleCV/image_classification/models/mobilenet_v2.py b/PaddleCV/image_classification/models/mobilenet_v2.py index 77d88c7d..90e2ff60 100644 --- a/PaddleCV/image_classification/models/mobilenet_v2.py +++ b/PaddleCV/image_classification/models/mobilenet_v2.py @@ -1,3 +1,17 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -5,7 +19,8 @@ import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -__all__ = ['MobileNetV2'] +__all__ = ['MobileNetV2', 'MobileNetV2_x0_25, ''MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', 'MobileNetV2_x2_0', + 'MobileNetV2_scale'] train_parameters = { "input_size": [3, 224, 224], @@ -21,11 +36,16 @@ train_parameters = { class MobileNetV2(): - def __init__(self): + def __init__(self, scale=1.0, change_depth=False): self.params = train_parameters - - def net(self, input, class_dim=1000, scale=1.0): - + self.scale = scale + self.change_depth=change_depth + + + def net(self, input, class_dim=1000): + scale = self.scale + change_depth = self.change_depth + #if change_depth is True, the new depth is 1.4 times as deep as before. bottleneck_params_list = [ (1, 16, 1, 1), (6, 24, 2, 2), @@ -34,6 +54,14 @@ class MobileNetV2(): (6, 96, 3, 1), (6, 160, 3, 2), (6, 320, 1, 1), + ] if change_depth == False else [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 5, 2), + (6, 64, 7, 2), + (6, 96, 5, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), ] #conv1 @@ -196,3 +224,29 @@ class MobileNetV2(): expansion_factor=t, name=name + '_' + str(i + 1)) return last_residual_block + + + +def MobileNetV2_x0_25(): + model = MobileNetV2(scale=0.25) + return model + +def MobileNetV2_x0_5(): + model = MobileNetV2(scale=0.5) + return model + +def MobileNetV2_x1_0(): + model = MobileNetV2(scale=1.0) + return model + +def MobileNetV2_x1_5(): + model = MobileNetV2(scale=1.5) + return model + +def MobileNetV2_x2_0(): + model = MobileNetV2(scale=2.0) + return model + +def MobileNetV2_scale(): + model = MobileNetV2(scale=1.2, change_depth=True) + return model \ No newline at end of file diff --git a/PaddleCV/image_classification/models/resnext.py b/PaddleCV/image_classification/models/resnext.py index a50db517..c2c94e29 100644 --- a/PaddleCV/image_classification/models/resnext.py +++ b/PaddleCV/image_classification/models/resnext.py @@ -22,7 +22,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr -__all__ = ["ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d"] +__all__ = ["ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d", "ResNeXt50_32x4d", "ResNeXt101_32x4d", + "ResNeXt152_32x4d"] train_parameters = { "input_size": [3, 224, 224], @@ -38,12 +39,14 @@ train_parameters = { class ResNeXt(): - def __init__(self, layers=50): + def __init__(self, layers=50, cardinality=64): self.params = train_parameters self.layers = layers + self.cardinality = cardinality def net(self, input, class_dim=1000): layers = self.layers + cardinality = self.cardinality supported_layers = [50, 101, 152] assert layers in supported_layers, \ "supported layers are {} but input layer is {}".format(supported_layers, layers) @@ -54,8 +57,9 @@ class ResNeXt(): depth = [3, 4, 23, 3] elif layers == 152: depth = [3, 8, 36, 3] - num_filters = [256, 512, 1024, 2048] - cardinality = 64 + + num_filters1 = [256, 512, 1024, 2048] + num_filters2 = [128, 256, 512, 1024] conv = self.conv_bn_layer( input=input, @@ -82,7 +86,7 @@ class ResNeXt(): conv_name = "res" + str(block + 2) + chr(97 + i) conv = self.bottleneck_block( input=conv, - num_filters=num_filters[block], + num_filters=num_filters1[block] if cardinality == 64 else num_filters2[block], stride=2 if i == 0 and block != 0 else 1, cardinality=cardinality, name=conv_name) @@ -137,6 +141,7 @@ class ResNeXt(): return input def bottleneck_block(self, input, num_filters, stride, cardinality, name): + cardinality = self.cardinality conv0 = self.conv_bn_layer( input=input, num_filters=num_filters, @@ -153,28 +158,40 @@ class ResNeXt(): name=name + "_branch2b") conv2 = self.conv_bn_layer( input=conv1, - num_filters=num_filters, + num_filters=num_filters if cardinality == 64 else num_filters*2, filter_size=1, act=None, name=name + "_branch2c") short = self.shortcut( - input, num_filters, stride, name=name + "_branch1") + input, num_filters if cardinality == 64 else num_filters*2, stride, name=name + "_branch1") return fluid.layers.elementwise_add( x=short, y=conv2, act='relu', name=name + ".add.output.5") def ResNeXt50_64x4d(): - model = ResNeXt(layers=50) + model = ResNeXt(layers=50, cardinality=64) + return model + +def ResNeXt50_32x4d(): + model = ResNeXt(layers=50, cardinality=32) return model def ResNeXt101_64x4d(): - model = ResNeXt(layers=101) + model = ResNeXt(layers=101, cardinality=64) + return model + +def ResNeXt101_32x4d(): + model = ResNeXt(layers=101, cardinality=32) return model def ResNeXt152_64x4d(): - model = ResNeXt(layers=152) + model = ResNeXt(layers=152, cardinality=64) + return model + +def ResNeXt152_32x4d(): + model = ResNeXt(layers=152, cardinality=32) return model diff --git a/PaddleCV/image_classification/models/xception.py b/PaddleCV/image_classification/models/xception.py new file mode 100644 index 00000000..2f14f0f3 --- /dev/null +++ b/PaddleCV/image_classification/models/xception.py @@ -0,0 +1,264 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import math +import sys +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['Xception', 'Xception_41', 'Xception_65', 'Xception_71'] + +train_parameters = { + "input_size": [3, 299, 299], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + +class Xception(object): + """Xception""" + def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8): + self.params = train_parameters + self.entry_flow_block_num = entry_flow_block_num + self.middle_flow_block_num = middle_flow_block_num + return + + def net(self, input, class_dim=1000): + conv = self.entry_flow(input, self.entry_flow_block_num) + conv = self.middle_flow(conv, self.middle_flow_block_num) + conv = self.exit_flow(conv, class_dim) + + return conv + + def entry_flow(self, input, block_num=3): + '''xception entry_flow''' + name = "entry_flow" + conv = self.conv_bn_layer( + input=input, num_filters=32, filter_size=3, stride=2, act='relu', name=name+"_conv1") + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name=name+"_conv2") + + + if block_num == 3: + relu_first = [False, True, True] + num_filters = [128, 256, 728] + stride=[2,2,2] + elif block_num == 5: + relu_first = [False, True, True, True, True] + num_filters = [128, 256, 256, 728, 728] + stride=[2,1,2,1,2] + else: + sys.exit(-1) + + for block in range(block_num): + curr_name = "{}_{}".format( name, block ) + conv = self.entry_flow_bottleneck_block(conv, + num_filters=num_filters[block], + name=curr_name, + stride=stride[block], + relu_first=relu_first[block]) + + return conv + + def entry_flow_bottleneck_block(self, input, num_filters, name, stride=2, relu_first=False): + '''entry_flow_bottleneck_block''' + short = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=1, + stride=stride, + padding=0, + act=None, + param_attr=ParamAttr(name+"_branch1_weights"), + bias_attr=False + ) + + conv0 = input + if relu_first: + conv0 = fluid.layers.relu( conv0 ) + + conv1 = self.separable_conv( conv0, num_filters, stride=1, name=name+"_branch2a_weights" ) + + conv2 = fluid.layers.relu( conv1 ) + conv2 = self.separable_conv( conv2, num_filters, stride=1, name=name+"_branch2b_weights" ) + + pool = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=stride, + pool_padding=1, + pool_type='max') + + return fluid.layers.elementwise_add(x=short, y=pool) + + def middle_flow(self, input, block_num=8): + '''xception middle_flow''' + num_filters=728 + conv = input + for block in range(block_num): + name = "middle_flow_{}".format( block ) + conv =self.middle_flow_bottleneck_block(conv, num_filters, name) + + return conv + + def middle_flow_bottleneck_block(self, input, num_filters, name): + '''middle_flow_bottleneck_block''' + conv0 = fluid.layers.relu( input ) + conv0 = self.separable_conv( conv0, num_filters=num_filters, stride=1, name=name+"_branch2a_weights" ) + + conv1 = fluid.layers.relu( conv0 ) + conv1 = self.separable_conv( conv1, num_filters=num_filters, stride=1, name=name+"_branch2b_weights" ) + + conv2 = fluid.layers.relu( conv1 ) + conv2 = self.separable_conv( conv2, num_filters=num_filters, stride=1, name=name+"_branch2c_weights" ) + + return fluid.layers.elementwise_add(x=input, y=conv2) + + + def exit_flow(self, input, class_dim): + '''xception exit flow''' + name = "exit_flow" + num_filters1 = 728 + num_filters2 = 1024 + conv0 = self.exit_flow_bottleneck_block( input, num_filters1, num_filters2, name=name+"_1" ) + + conv1 = self.separable_conv( conv0, num_filters=1536, stride=1, name=name+"_2" ) + conv1 = fluid.layers.relu( conv1 ) + + conv2 = self.separable_conv( conv1, num_filters=2048, stride=1, name=name+"_3" ) + conv2 = fluid.layers.relu( conv2 ) + + pool = fluid.layers.pool2d( + input=conv2, pool_type='avg', global_pooling=True) + + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + act='softmax', + param_attr=fluid.param_attr.ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + + return out + + def exit_flow_bottleneck_block(self, input, num_filters1, num_filters2, name): + '''entry_flow_bottleneck_block''' + short = fluid.layers.conv2d( + input=input, + num_filters=num_filters2, + filter_size=1, + stride=2, + padding=0, + act=None, + param_attr=ParamAttr(name+"_branch1_weights"), + bias_attr=False + ) + + conv0 = fluid.layers.relu( input ) + conv1 = self.separable_conv( conv0, num_filters1, stride=1, name=name+"_branch2a_weights" ) + + conv2 = fluid.layers.relu( conv1 ) + conv2 = self.separable_conv( conv2, num_filters2, stride=1, name=name+"_branch2b_weights" ) + + pool = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + return fluid.layers.elementwise_add(x=short, y=pool) + + + def separable_conv(self, input, num_filters, stride=1, name=None): + """separable_conv""" + pointwise_conv = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + stride=1, + name=name + "_sep") + + depthwise_conv = self.conv_bn_layer( + input=pointwise_conv, + filter_size=3, + num_filters=num_filters, + stride=stride, + groups=num_filters, + use_cudnn=False, + name=name + "_dw") + + return depthwise_conv + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + use_cudnn=True, + name=None): + """conv_bn_layer""" + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + use_cudnn=use_cudnn, + name=name + '.conv2d.output.1') + + bn_name = "bn_" + name + + return fluid.layers.batch_norm(input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + +def Xception_41(): + """Xception_41""" + model = Xception(entry_flow_block_num=3, middle_flow_block_num=8) + return model + +def Xception_65(): + """Xception_65""" + model = Xception(entry_flow_block_num=3, middle_flow_block_num=16) + return model + +def Xception_71(): + """Xception_71""" + model = Xception(entry_flow_block_num=5, middle_flow_block_num=16) + return model -- GitLab