diff --git a/PaddleCV/image_classification/README.md b/PaddleCV/image_classification/README.md index ada3b92c98bc7c841f7073462fc8455520db2359..4eb3387239617babce07391786c47d22b897722f 100644 --- a/PaddleCV/image_classification/README.md +++ b/PaddleCV/image_classification/README.md @@ -166,20 +166,39 @@ The image classification models currently supported by PaddlePaddle are listed i As the activation function ```swish``` and ```relu6``` which separately used in ShuffleNetV2 and MobileNetV2 net are not supported by Paddle TensorRT, inference acceleration performance of them doesn't significient improve. Pretrained models can be downloaded by clicking related model names. - Note1: ResNet50_vd_v2 is the distilled version of ResNet50_vd. -- Note2: In addition to the image resolution feeded in InceptionV4 net is ```299x299```, others are ```224x224```. +- Note2: In addition to the image resolution feeded in InceptionV4 and Xception net is ```299x299```, others are ```224x224```. - Note3: It's necessary to convert the train model to a binary model when appling dynamic link library to infer, One can do it by running following command: ```python infer.py --save_inference=True``` - + +### AlexNet |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | 3.083 | 2.728 | + +### VGG +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | 8.223 | 6.821 | |[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | 9.512 | 7.783 | |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | 11.315 | 9.067 | |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | 13.096 | 10.388 | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 | 1.615 | -|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### MobileNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 |1.615 | +|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% | 4.514 | 4.150 | +|[MobileNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### ShuffleNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | + +### ResNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | 3.456 | 2.484 | |[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | 5.668 | 3.767 | |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | 8.787 | 5.434 | @@ -187,19 +206,36 @@ As the activation function ```swish``` and ```relu6``` which separately used in |[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | 9.058 | 5.510 | |[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84%/94.93% | 9.058 | 5.510 | |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | 15.447 | 8.779 | -|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 79.44%/94.47% | 15.685 | 8.878 | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17%/94.97% | 15.685 | 8.878 | |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | 21.816 | 12.148 | |[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | 22.041 | 12.259 | |[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93%/95.33% | 28.015 | 15.278 | -|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar) | 79.35%/94.52% | 41.073 | 38.736 | + +### ResNeXt +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75%/93.82% | 12.863 | 9.837 | +|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 28.162 | 18.271 | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 41.073 | 38.736 | |[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | 42.277 | 40.929 | + +### SENet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | 14.916 | 12.126 | |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | 30.085 | 24.110 | -|[SE154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | +|[SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | + +### Inception +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | -|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | +|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | |[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77%/95.26% | 32.413 | 18.154 | + + + ## FAQ **Q:** How to solve this problem when I try to train a 6-classes dataset with indicating pretrained_model parameter ? @@ -222,6 +258,7 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - MobileNetV2: [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381v4.pdf), Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen - VGG: [Very Deep Convolutional Networks for Large-scale Image Recognition](https://arxiv.org/pdf/1409.1556), Karen Simonyan, Andrew Zisserman - GoogLeNet: [Going Deeper with Convolutions](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf), Christian Szegedy1, Wei Liu2, Yangqing Jia +- Xception: [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357), Franc ̧ois Chollet - InceptionV4: [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261), Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi @@ -229,11 +266,12 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - 2018/12/03 **Stage1**: Update AlexNet, ResNet50, ResNet101, MobileNetV1 - 2018/12/23 **Stage2**: Update VGG Series, SeResNeXt50_32x4d, SeResNeXt101_32x4d, ResNet152 -- 2019/01/31 Update MobileNetV2 +- 2019/01/31 Update MobileNetV2_x1_0 - 2019/04/01 **Stage3**: Update ResNet18, ResNet34, GoogLeNet, ShuffleNetV2 - 2019/06/12 **Stage4**:Update ResNet50_vc, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd, SE154_vd InceptionV4, ResNeXt101_64x4d, ResNeXt101_vd_64x4d - 2019/06/22 Update ResNet50_vd_v2 +- 2019/07/02 Update MobileNetV2_x0_5, ResNeXt50_32x4d, ResNeXt50_64x4d, Xception_41, ResNet101_vd ## Contribute -If you can fix an issue or add a new feature, please open a PR to us. If your PR is accepted, you can get scores according to the quality and difficulty of your PR(0~5), while you got 10 scores, you can contact us for interview or recommendation letter. +If you can fix an issue or add a new feature, please open a PR to us. If your PR is accepted, you can get scores according to the quality and difficulty of your PR(0~5), while you got 10 scores, you can contact us for interview or recommendation letter. \ No newline at end of file diff --git a/PaddleCV/image_classification/README_cn.md b/PaddleCV/image_classification/README_cn.md index 0de76b32b1ec4cb5d7b611602fcb9286bc286304..0fc70ba1c30a4b3e8b2681db3f74b7d647596b07 100644 --- a/PaddleCV/image_classification/README_cn.md +++ b/PaddleCV/image_classification/README_cn.md @@ -155,20 +155,39 @@ python infer.py \ - 注意 1:ResNet50_vd_v2是ResNet50_vd蒸馏版本。 - 2:除了InceptionV4采用的输入图像的分辨率为299x299,其余模型测试时使用的分辨率均为224x224。 + 2:除了InceptionV4和Xception采用的输入图像的分辨率为299x299,其余模型测试时使用的分辨率均为224x224。 3:调用动态链接库预测时需要将训练模型转换为二进制模型 ```python infer.py --save_inference=True``` +### AlexNet |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | 3.083 | 2.728 | + +### VGG +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | 8.223 | 6.821 | |[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | 9.512 | 7.783 | |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | 11.315 | 9.067 | |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | 13.096 | 10.388 | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 | 1.615 | -|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### MobileNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 |1.615 | +|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% | 4.514 | 4.150 | +|[MobileNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | + +### ShuffleNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | + +### ResNet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | 3.456 | 2.484 | |[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | 5.668 | 3.767 | |[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | 8.787 | 5.434 | @@ -176,20 +195,36 @@ python infer.py \ |[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | 9.058 | 5.510 | |[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84%/94.93% | 9.058 | 5.510 | |[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | 15.447 | 8.779 | -|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 79.44%/94.47% | 15.685 | 8.878 | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17%/94.97% | 15.685 | 8.878 | |[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | 21.816 | 12.148 | |[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | 22.041 | 12.259 | |[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93%/95.33% | 28.015 | 15.278 | -|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar) | 79.35%/94.52% | 41.073 | 38.736 | + +### ResNeXt +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | +|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75%/93.82% | 12.863 | 9.837 | +|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 28.162 | 18.271 | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 41.073 | 38.736 | |[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | 42.277 | 40.929 | + +### SENet +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | 14.916 | 12.126 | |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | 30.085 | 24.110 | -|[SE154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | +|[SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | + +### Inception +|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: | |[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | -|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | +|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | |[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77%/95.26% | 32.413 | 18.154 | + + ## FAQ **Q:** 加载预训练模型报错,Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:1000 != labels_dims[1]:6. @@ -207,15 +242,18 @@ python infer.py \ - MobileNetV2: [MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/pdf/1801.04381v4.pdf), Mark Sandler, Andrew Howard, Menglong Zhu, Andrey Zhmoginov, Liang-Chieh Chen - VGG: [Very Deep Convolutional Networks for Large-scale Image Recognition](https://arxiv.org/pdf/1409.1556), Karen Simonyan, Andrew Zisserman - GoogLeNet: [Going Deeper with Convolutions](https://www.cs.unc.edu/~wliu/papers/GoogLeNet.pdf), Christian Szegedy1, Wei Liu2, Yangqing Jia +- Xception: [Xception: Deep Learning with Depthwise Separable Convolutions](https://arxiv.org/abs/1610.02357), Franc ̧ois Chollet - InceptionV4: [Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning](https://arxiv.org/abs/1602.07261), Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi ## 版本更新 - 2018/12/03 **Stage1**: 更新AlexNet,ResNet50,ResNet101,MobileNetV1 - 2018/12/23 **Stage2**: 更新VGG系列 SeResNeXt50_32x4d,SeResNeXt101_32x4d,ResNet152 -- 2019/01/31 更新MobileNetV2 +- 2019/01/31 更新MobileNetV2_x1_0 - 2019/04/01 **Stage3**: 更新ResNet18,ResNet34,GoogLeNet,ShuffleNetV2 - 2019/06/12 **Stage4**: 更新ResNet50_vc,ResNet50_vd,ResNet101_vd,ResNet152_vd,ResNet200_vd,SE154_vd InceptionV4,ResNeXt101_64x4d,ResNeXt101_vd_64x4d - 2019/06/22 更新ResNet50_vd_v2 +- 2019/07/02 更新MobileNetV2_x0_5,ResNeXt50_32x4d,ResNeXt50_64x4d,Xception_41,ResNet101_vd + ## 如何贡献代码 diff --git a/PaddleCV/image_classification/models/__init__.py b/PaddleCV/image_classification/models/__init__.py index 2661aaa27a47cb2e743649b0620765d816823e71..814ec125e04cef82ba36f1a391482245fdaecc5e 100644 --- a/PaddleCV/image_classification/models/__init__.py +++ b/PaddleCV/image_classification/models/__init__.py @@ -1,14 +1,13 @@ from .alexnet import AlexNet from .mobilenet import MobileNet -from .mobilenet_v2 import MobileNetV2 +from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x1_0, MobileNetV2_x1_5, MobileNetV2_x2_0, MobileNetV2_scale from .googlenet import GoogleNet from .vgg import VGG11, VGG13, VGG16, VGG19 from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc from .resnet_vd import ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd -from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d +from .resnext import ResNeXt50_64x4d, ResNeXt101_64x4d, ResNeXt152_64x4d, ResNeXt50_32x4d, ResNeXt101_32x4d, ResNeXt152_32x4d from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_64x4d - from .resnet_dist import DistResNet from .inception_v4 import InceptionV4 from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d @@ -16,3 +15,4 @@ from .se_resnext_vd import SE_ResNeXt50_32x4d_vd, SE_ResNeXt101_32x4d_vd, SE154_ from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 from .shufflenet_v2 import ShuffleNetV2, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish, ShuffleNetV2_x8_0_swish from .fast_imagenet import FastImageNet +from .xception import Xception_41, Xception_65, Xception_71 \ No newline at end of file diff --git a/PaddleCV/image_classification/models/mobilenet_v2.py b/PaddleCV/image_classification/models/mobilenet_v2.py index 77d88c7da625c0c953c75d229148868f0481f2a2..90e2ff6095cd492398c4e60ddee73f740c9e81a6 100644 --- a/PaddleCV/image_classification/models/mobilenet_v2.py +++ b/PaddleCV/image_classification/models/mobilenet_v2.py @@ -1,3 +1,17 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -5,7 +19,8 @@ import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -__all__ = ['MobileNetV2'] +__all__ = ['MobileNetV2', 'MobileNetV2_x0_25, ''MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', 'MobileNetV2_x2_0', + 'MobileNetV2_scale'] train_parameters = { "input_size": [3, 224, 224], @@ -21,11 +36,16 @@ train_parameters = { class MobileNetV2(): - def __init__(self): + def __init__(self, scale=1.0, change_depth=False): self.params = train_parameters - - def net(self, input, class_dim=1000, scale=1.0): - + self.scale = scale + self.change_depth=change_depth + + + def net(self, input, class_dim=1000): + scale = self.scale + change_depth = self.change_depth + #if change_depth is True, the new depth is 1.4 times as deep as before. bottleneck_params_list = [ (1, 16, 1, 1), (6, 24, 2, 2), @@ -34,6 +54,14 @@ class MobileNetV2(): (6, 96, 3, 1), (6, 160, 3, 2), (6, 320, 1, 1), + ] if change_depth == False else [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 5, 2), + (6, 64, 7, 2), + (6, 96, 5, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), ] #conv1 @@ -196,3 +224,29 @@ class MobileNetV2(): expansion_factor=t, name=name + '_' + str(i + 1)) return last_residual_block + + + +def MobileNetV2_x0_25(): + model = MobileNetV2(scale=0.25) + return model + +def MobileNetV2_x0_5(): + model = MobileNetV2(scale=0.5) + return model + +def MobileNetV2_x1_0(): + model = MobileNetV2(scale=1.0) + return model + +def MobileNetV2_x1_5(): + model = MobileNetV2(scale=1.5) + return model + +def MobileNetV2_x2_0(): + model = MobileNetV2(scale=2.0) + return model + +def MobileNetV2_scale(): + model = MobileNetV2(scale=1.2, change_depth=True) + return model \ No newline at end of file diff --git a/PaddleCV/image_classification/models/resnext.py b/PaddleCV/image_classification/models/resnext.py index a50db517af4a3e807fbd41d6729dbbb759bc2dc2..c2c94e2992a78ac840ccbe4a4a578eb41810dc36 100644 --- a/PaddleCV/image_classification/models/resnext.py +++ b/PaddleCV/image_classification/models/resnext.py @@ -22,7 +22,8 @@ import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr -__all__ = ["ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d"] +__all__ = ["ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d", "ResNeXt50_32x4d", "ResNeXt101_32x4d", + "ResNeXt152_32x4d"] train_parameters = { "input_size": [3, 224, 224], @@ -38,12 +39,14 @@ train_parameters = { class ResNeXt(): - def __init__(self, layers=50): + def __init__(self, layers=50, cardinality=64): self.params = train_parameters self.layers = layers + self.cardinality = cardinality def net(self, input, class_dim=1000): layers = self.layers + cardinality = self.cardinality supported_layers = [50, 101, 152] assert layers in supported_layers, \ "supported layers are {} but input layer is {}".format(supported_layers, layers) @@ -54,8 +57,9 @@ class ResNeXt(): depth = [3, 4, 23, 3] elif layers == 152: depth = [3, 8, 36, 3] - num_filters = [256, 512, 1024, 2048] - cardinality = 64 + + num_filters1 = [256, 512, 1024, 2048] + num_filters2 = [128, 256, 512, 1024] conv = self.conv_bn_layer( input=input, @@ -82,7 +86,7 @@ class ResNeXt(): conv_name = "res" + str(block + 2) + chr(97 + i) conv = self.bottleneck_block( input=conv, - num_filters=num_filters[block], + num_filters=num_filters1[block] if cardinality == 64 else num_filters2[block], stride=2 if i == 0 and block != 0 else 1, cardinality=cardinality, name=conv_name) @@ -137,6 +141,7 @@ class ResNeXt(): return input def bottleneck_block(self, input, num_filters, stride, cardinality, name): + cardinality = self.cardinality conv0 = self.conv_bn_layer( input=input, num_filters=num_filters, @@ -153,28 +158,40 @@ class ResNeXt(): name=name + "_branch2b") conv2 = self.conv_bn_layer( input=conv1, - num_filters=num_filters, + num_filters=num_filters if cardinality == 64 else num_filters*2, filter_size=1, act=None, name=name + "_branch2c") short = self.shortcut( - input, num_filters, stride, name=name + "_branch1") + input, num_filters if cardinality == 64 else num_filters*2, stride, name=name + "_branch1") return fluid.layers.elementwise_add( x=short, y=conv2, act='relu', name=name + ".add.output.5") def ResNeXt50_64x4d(): - model = ResNeXt(layers=50) + model = ResNeXt(layers=50, cardinality=64) + return model + +def ResNeXt50_32x4d(): + model = ResNeXt(layers=50, cardinality=32) return model def ResNeXt101_64x4d(): - model = ResNeXt(layers=101) + model = ResNeXt(layers=101, cardinality=64) + return model + +def ResNeXt101_32x4d(): + model = ResNeXt(layers=101, cardinality=32) return model def ResNeXt152_64x4d(): - model = ResNeXt(layers=152) + model = ResNeXt(layers=152, cardinality=64) + return model + +def ResNeXt152_32x4d(): + model = ResNeXt(layers=152, cardinality=32) return model diff --git a/PaddleCV/image_classification/models/xception.py b/PaddleCV/image_classification/models/xception.py new file mode 100644 index 0000000000000000000000000000000000000000..2f14f0f3beece7757475b58d1760c2624fbf7492 --- /dev/null +++ b/PaddleCV/image_classification/models/xception.py @@ -0,0 +1,264 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import paddle +import paddle.fluid as fluid +import math +import sys +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['Xception', 'Xception_41', 'Xception_65', 'Xception_71'] + +train_parameters = { + "input_size": [3, 299, 299], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + +class Xception(object): + """Xception""" + def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8): + self.params = train_parameters + self.entry_flow_block_num = entry_flow_block_num + self.middle_flow_block_num = middle_flow_block_num + return + + def net(self, input, class_dim=1000): + conv = self.entry_flow(input, self.entry_flow_block_num) + conv = self.middle_flow(conv, self.middle_flow_block_num) + conv = self.exit_flow(conv, class_dim) + + return conv + + def entry_flow(self, input, block_num=3): + '''xception entry_flow''' + name = "entry_flow" + conv = self.conv_bn_layer( + input=input, num_filters=32, filter_size=3, stride=2, act='relu', name=name+"_conv1") + conv = self.conv_bn_layer( + input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name=name+"_conv2") + + + if block_num == 3: + relu_first = [False, True, True] + num_filters = [128, 256, 728] + stride=[2,2,2] + elif block_num == 5: + relu_first = [False, True, True, True, True] + num_filters = [128, 256, 256, 728, 728] + stride=[2,1,2,1,2] + else: + sys.exit(-1) + + for block in range(block_num): + curr_name = "{}_{}".format( name, block ) + conv = self.entry_flow_bottleneck_block(conv, + num_filters=num_filters[block], + name=curr_name, + stride=stride[block], + relu_first=relu_first[block]) + + return conv + + def entry_flow_bottleneck_block(self, input, num_filters, name, stride=2, relu_first=False): + '''entry_flow_bottleneck_block''' + short = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=1, + stride=stride, + padding=0, + act=None, + param_attr=ParamAttr(name+"_branch1_weights"), + bias_attr=False + ) + + conv0 = input + if relu_first: + conv0 = fluid.layers.relu( conv0 ) + + conv1 = self.separable_conv( conv0, num_filters, stride=1, name=name+"_branch2a_weights" ) + + conv2 = fluid.layers.relu( conv1 ) + conv2 = self.separable_conv( conv2, num_filters, stride=1, name=name+"_branch2b_weights" ) + + pool = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=stride, + pool_padding=1, + pool_type='max') + + return fluid.layers.elementwise_add(x=short, y=pool) + + def middle_flow(self, input, block_num=8): + '''xception middle_flow''' + num_filters=728 + conv = input + for block in range(block_num): + name = "middle_flow_{}".format( block ) + conv =self.middle_flow_bottleneck_block(conv, num_filters, name) + + return conv + + def middle_flow_bottleneck_block(self, input, num_filters, name): + '''middle_flow_bottleneck_block''' + conv0 = fluid.layers.relu( input ) + conv0 = self.separable_conv( conv0, num_filters=num_filters, stride=1, name=name+"_branch2a_weights" ) + + conv1 = fluid.layers.relu( conv0 ) + conv1 = self.separable_conv( conv1, num_filters=num_filters, stride=1, name=name+"_branch2b_weights" ) + + conv2 = fluid.layers.relu( conv1 ) + conv2 = self.separable_conv( conv2, num_filters=num_filters, stride=1, name=name+"_branch2c_weights" ) + + return fluid.layers.elementwise_add(x=input, y=conv2) + + + def exit_flow(self, input, class_dim): + '''xception exit flow''' + name = "exit_flow" + num_filters1 = 728 + num_filters2 = 1024 + conv0 = self.exit_flow_bottleneck_block( input, num_filters1, num_filters2, name=name+"_1" ) + + conv1 = self.separable_conv( conv0, num_filters=1536, stride=1, name=name+"_2" ) + conv1 = fluid.layers.relu( conv1 ) + + conv2 = self.separable_conv( conv1, num_filters=2048, stride=1, name=name+"_3" ) + conv2 = fluid.layers.relu( conv2 ) + + pool = fluid.layers.pool2d( + input=conv2, pool_type='avg', global_pooling=True) + + + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + out = fluid.layers.fc(input=pool, + size=class_dim, + act='softmax', + param_attr=fluid.param_attr.ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + + return out + + def exit_flow_bottleneck_block(self, input, num_filters1, num_filters2, name): + '''entry_flow_bottleneck_block''' + short = fluid.layers.conv2d( + input=input, + num_filters=num_filters2, + filter_size=1, + stride=2, + padding=0, + act=None, + param_attr=ParamAttr(name+"_branch1_weights"), + bias_attr=False + ) + + conv0 = fluid.layers.relu( input ) + conv1 = self.separable_conv( conv0, num_filters1, stride=1, name=name+"_branch2a_weights" ) + + conv2 = fluid.layers.relu( conv1 ) + conv2 = self.separable_conv( conv2, num_filters2, stride=1, name=name+"_branch2b_weights" ) + + pool = fluid.layers.pool2d( + input=conv2, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + return fluid.layers.elementwise_add(x=short, y=pool) + + + def separable_conv(self, input, num_filters, stride=1, name=None): + """separable_conv""" + pointwise_conv = self.conv_bn_layer( + input=input, + filter_size=1, + num_filters=num_filters, + stride=1, + name=name + "_sep") + + depthwise_conv = self.conv_bn_layer( + input=pointwise_conv, + filter_size=3, + num_filters=num_filters, + stride=stride, + groups=num_filters, + use_cudnn=False, + name=name + "_dw") + + return depthwise_conv + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + use_cudnn=True, + name=None): + """conv_bn_layer""" + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) / 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + use_cudnn=use_cudnn, + name=name + '.conv2d.output.1') + + bn_name = "bn_" + name + + return fluid.layers.batch_norm(input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + + +def Xception_41(): + """Xception_41""" + model = Xception(entry_flow_block_num=3, middle_flow_block_num=8) + return model + +def Xception_65(): + """Xception_65""" + model = Xception(entry_flow_block_num=3, middle_flow_block_num=16) + return model + +def Xception_71(): + """Xception_71""" + model = Xception(entry_flow_block_num=5, middle_flow_block_num=16) + return model