fix

9eed230f · shippingwang · f5168a7a · dab1feb0 · 9eed230f · 9eed230f
20 changed file
--- a/README.md
+++ b/README.md
@@ -100,10 +100,6 @@ PaddleClas的安装说明、模型训练、预测、评估以及模型微调（f
 近年来，学术界和工业界广泛关注图像中目标检测任务，而图像分类的网络结构以及预训练模型效果直接影响目标检测的效果。PaddleDetection使用PaddleClas的82.39%的ResNet50_vd的预训练模型，结合自身丰富的检测算子，提供了一种面向服务器端应用的目标检测方案，PSS-DET (Practical Server Side Detection)。该方案融合了多种只增加少许计算量，但是可以有效提升两阶段Faster RCNN目标检测效果的策略，包括检测模型剪裁、使用分类效果更优的预训练模型、DCNv2、Cascade RCNN、AutoAugment、Libra sampling以及多尺度训练。其中基于82.39%的R50_vd_ssld预训练模型，与79.12%的R50_vd的预训练模型相比，检测效果可以提升1.5%。在COCO目标检测数据集上测试PSS-DET，当V100单卡预测速度为61FPS时，mAP是41.6%，预测速度为20FPS时，mAP是47.8%。详情请参考[**通用目标检测章节**](https://paddleclas.readthedocs.io/zh_CN/latest/application/object_detection.html)。


- TODO
- [ ] PaddleClas在OCR任务中的应用
- [ ] PaddleClas在人脸检测和识别中的应用
-
 ## 工业级应用部署工具
 PaddlePaddle提供了一系列实用工具，便于工业应用部署PaddleClas，具体请参考文档教程中的[**实用工具章节**](https://paddleclas.readthedocs.io/zh_CN/latest/extension/index.html)。


--- a/configs/EfficientNet/EfficientLite0.yaml
+++ b/configs/EfficientNet/EfficientLite0.yaml
+mode: 'train'
+ARCHITECTURE:
+    name: "EfficientNetLite0"
+    params:
+        is_test: False
+        padding_type : "SAME"
+        override_params:
+            drop_connect_rate: 0.1
+            fix_head_stem: True
+            relu_fn: True
+
+pretrained_model: ""
+model_save_dir: "./output/"
+classes_num: 1000
+total_images: 1281167
+save_interval: 1
+validate: True
+valid_interval: 1
+epochs: 360
+topk: 5
+image_shape: [3, 224, 224]
+use_ema: True
+ema_decay: 0.9999
+use_aa: True
+ls_epsilon: 0.1
+
+LEARNING_RATE:
+    function: 'ExponentialWarmup'
+    params:
+        lr: 0.032
+
+OPTIMIZER:
+    function: 'RMSProp'
+    params:
+        momentum: 0.9
+        rho: 0.9
+        epsilon: 0.001
+    regularizer:
+        function: 'L2'
+        factor: 0.00001
+
+TRAIN:
+    batch_size: 512
+    num_workers: 4
+    file_list: "./dataset/ILSVRC2012/train_list.txt"
+    data_dir: "./dataset/ILSVRC2012/"
+    shuffle_seed: 0
+    transforms:
+        - DecodeImage:
+            to_rgb: True
+            to_np: False
+            channel_first: False
+        - RandCropImage:
+            size: 224
+            interpolation: 1
+        - RandFlipImage:
+            flip_code: 1
+        - AutoAugment:
+        - NormalizeImage:
+            scale: 1./255.
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - ToCHWImage:
+
+
+
+VALID:
+    batch_size: 128
+    num_workers: 4
+    file_list: "./dataset/ILSVRC2012/val_list.txt"
+    data_dir: "./dataset/ILSVRC2012/"
+    shuffle_seed: 0
+    transforms:
+        - DecodeImage:
+            to_rgb: True
+            to_np: False
+            channel_first: False
+        - ResizeImage:
+            interpolation: 1
+            resize_short: 256
+        - CropImage:
+            size: 224
+        - NormalizeImage:
+            scale: 1./255.
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - ToCHWImage:
+
+
--- a/configs/EfficientNet/EfficientNetB0.yaml
+++ b/configs/EfficientNet/EfficientNetB0.yaml
@@ -19,7 +19,6 @@ topk: 5
 image_shape: [3, 224, 224]
 use_ema: True
 ema_decay: 0.9999
-use_aa: True
 ls_epsilon: 0.1

 LEARNING_RATE:
@@ -46,7 +45,7 @@ TRAIN:
    transforms:
        - DecodeImage:
            to_rgb: True
-            to_np: Fals
+            to_np: False
            channel_first: False
        - RandCropImage:
            size: 224
@@ -85,5 +84,3 @@ VALID:
            std: [0.229, 0.224, 0.225]
            order: ''
        - ToCHWImage:
-
-
--- a/configs/RegNet/RegNetX_4GF.yaml
+++ b/configs/RegNet/RegNetX_4GF.yaml
+mode: 'train'
+ARCHITECTURE:
+    name: 'RegNetX_4GF'
+
+pretrained_model: ""
+model_save_dir: "./output/"
+classes_num: 1000
+total_images: 1281167
+save_interval: 1
+validate: True
+valid_interval: 1
+epochs: 100
+topk: 5
+image_shape: [3, 224, 224]
+
+use_mix: False
+ls_epsilon: -1
+
+LEARNING_RATE:
+    function: 'CosineWarmup'
+    params:
+        lr: 0.4
+        warmup_epoch: 5
+
+OPTIMIZER:
+    function: 'Momentum'
+    params:
+        momentum: 0.9
+    regularizer:
+        function: 'L2'
+        factor: 0.000050
+
+TRAIN:
+    batch_size: 512
+    num_workers: 4
+    file_list: "./dataset/ILSVRC2012/train_list.txt"
+    data_dir: "./dataset/ILSVRC2012/"
+    shuffle_seed: 0
+    transforms:
+        - DecodeImage:
+            to_rgb: True
+            to_np: False
+            channel_first: False
+        - RandCropImage:
+            size: 224
+        - RandFlipImage:
+            flip_code: 1
+        - NormalizeImage:
+            scale: 1./255.
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - ToCHWImage:
+
+VALID:
+    batch_size: 256
+    num_workers: 4
+    file_list: "./dataset/ILSVRC2012/val_list.txt"
+    data_dir: "./dataset/ILSVRC2012/"
+    shuffle_seed: 0
+    transforms:
+        - DecodeImage:
+            to_rgb: True
+            to_np: False
+            channel_first: False
+        - ResizeImage:
+            resize_short: 256
+        - CropImage:
+            size: 224
+        - NormalizeImage:
+            scale: 1.0/255.0
+            mean: [0.485, 0.456, 0.406]
+            std: [0.229, 0.224, 0.225]
+            order: ''
+        - ToCHWImage:
--- a/docs/en/extension/paddle_inference_en.md
+++ b/docs/en/extension/paddle_inference_en.md
@@ -100,7 +100,7 @@ python tools/export_model.py \
 The complete example is provided in the `tools/infer/predict.py`，just execute the following command to complete the prediction:

 ```
-python ./predict.py \
+python ./tools/infer/predict.py \
    -i=./test.jpeg \
    -m=./resnet50-vd/model \
    -p=./resnet50-vd/params \

--- a/docs/en/models/Mobile_en.md
+++ b/docs/en/models/Mobile_en.md
@@ -10,6 +10,7 @@ The ShuffleNet series network is the lightweight network structure proposed by M

 MobileNetV3 is a new and lightweight network based on NAS proposed by Google in 2019. In order to further improve the effect, the activation functions of relu and sigmoid were replaced with hard_swish and hard_sigmoid activation functions, and some improved strategies were introduced to reduce the amount of network computing.

+GhosttNet is a brand-new lightweight network structure proposed by Huawei in 2020. By introducing the ghost module, the problem of redundant calculation of features in traditional deep networks is greatly alleviated, which greatly reduces the amount of network parameters and calculations.

 ![](../../images/models/mobile_arm_top1.png)

@@ -57,6 +58,9 @@ Currently there are 32 pretrained models of the mobile series open source by Pad
 | ShuffleNetV2_x1_5                    | 0.716   | 0.902   | 0.726             |                   | 0.580        | 3.470             |
 | ShuffleNetV2_x2_0                    | 0.732   | 0.912   | 0.749             |                   | 1.120        | 7.320             |
 | ShuffleNetV2_swish                   | 0.700   | 0.892   |                   |                   | 0.290        | 2.260             |
+| GhostNet_x0_5                        | 0.668   | 0.869   | 0.662             | 0.866             | 0.041        | 2.600             |
+| GhostNet_x1_0                        | 0.740   | 0.916   | 0.739             | 0.914             | 0.147        | 5.200             |
+| GhostNet_x1_3                        | 0.757   | 0.925   | 0.757             | 0.927             | 0.220        | 7.300             |


 ## Inference speed and storage size based on SD855

--- a/docs/en/models/models_intro_en.md
+++ b/docs/en/models/models_intro_en.md
@@ -93,6 +93,10 @@ python tools/infer/predict.py \
    - [ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_5_pretrained.tar)
    - [ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x2_0_pretrained.tar)
    - [ShuffleNetV2_swish](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_swish_pretrained.tar)
+  - GhostNet series<sup>[[23](#ref23)]</sup>([paper link](https://arxiv.org/pdf/1911.11907.pdf))
+    - [GhostNet_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/GhostNet_x0_5_pretrained.pdparams)
+    - [GhostNet_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/GhostNet_x1_0_pretrained.pdparams)
+    - [GhostNet_x1_3](https://paddle-imagenet-models-name.bj.bcebos.com/GhostNet_x1_3_pretrained.pdparams)


 - SEResNeXt and Res2Net series
@@ -254,3 +258,5 @@ python tools/infer/predict.py \
 <a name="ref21">[21]</a> Redmon J, Divvala S, Girshick R, et al. You only look once: Unified, real-time object detection[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 779-788.

 <a name="ref22">[22]</a> Ding X, Guo Y, Ding G, et al. Acnet: Strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1911-1920.
+
+<a name="ref23">[23]</a> Han K, Wang Y, Tian Q, et al. GhostNet: More features from cheap operations[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 1580-1589.
\ No newline at end of file
--- a/docs/zh_CN/extension/paddle_inference.md
+++ b/docs/zh_CN/extension/paddle_inference.md
@@ -100,7 +100,7 @@ python tools/export_model.py \
 在模型库的 `tools/infer/predict.py` 中提供了完整的示例，只需执行下述命令即可完成预测：

 ```
-python ./predict.py \
+python ./tools/infer/predict.py \
    -i=./test.jpeg \
    -m=./resnet50-vd/model \
    -p=./resnet50-vd/params \
@@ -122,7 +122,7 @@ python ./predict.py \

 注意：
 当启用benchmark时，默认开启tersorrt进行预测
- 
+

 构建预测引擎：

@@ -259,4 +259,3 @@ outputs = exe.run(compiled_program,
 ```

 上述执行预测时候的参数说明可以参考官网 [fluid.Executor](https://www.paddlepaddle.org.cn/documentation/docs/zh/api_cn/executor_cn/Executor_cn.html)
-
--- a/docs/zh_CN/models/Mobile.md
+++ b/docs/zh_CN/models/Mobile.md
@@ -9,6 +9,8 @@ ShuffleNet系列网络是旷视提出的轻量化网络结构，到目前为止

 MobileNetV3是Google于2019年提出的一种基于NAS的新的轻量级网络，为了进一步提升效果，将relu和sigmoid激活函数分别替换为hard_swish与hard_sigmoid激活函数，同时引入了一些专门减小网络计算量的改进策略。

+GhosttNet是华为于2020年提出的一种全新的轻量化网络结构，通过引入ghost module，大大减缓了传统深度网络中特征的冗余计算问题，使得网络的参数量和计算量大大降低。
+
 ![](../../images/models/mobile_arm_top1.png)

 ![](../../images/models/mobile_arm_storage.png)
@@ -18,7 +20,7 @@ MobileNetV3是Google于2019年提出的一种基于NAS的新的轻量级网络
 ![](../../images/models/T4_benchmark/t4.fp32.bs4.mobile_trt.params.png)


-目前PaddleClas开源的的移动端系列的预训练模型一共有32个，其指标如图所示。从图片可以看出，越新的轻量级模型往往有更优的表现，MobileNetV3代表了目前最新的轻量级神经网络结构。在MobileNetV3中，作者为了获得更高的精度，在global-avg-pooling后使用了1x1的卷积。该操作大幅提升了参数量但对计算量影响不大，所以如果从存储角度评价模型的优异程度，MobileNetV3优势不是很大，但由于其更小的计算量，使得其有更快的推理速度。此外，我们模型库中的ssld蒸馏模型表现优异，从各个考量角度下，都刷新了当前轻量级模型的精度。由于MobileNetV3模型结构复杂，分支较多，对GPU并不友好，GPU预测速度不如MobileNetV1。
+目前PaddleClas开源的的移动端系列的预训练模型一共有35个，其指标如图所示。从图片可以看出，越新的轻量级模型往往有更优的表现，MobileNetV3代表了目前主流的轻量级神经网络结构。在MobileNetV3中，作者为了获得更高的精度，在global-avg-pooling后使用了1x1的卷积。该操作大幅提升了参数量但对计算量影响不大，所以如果从存储角度评价模型的优异程度，MobileNetV3优势不是很大，但由于其更小的计算量，使得其有更快的推理速度。此外，我们模型库中的ssld蒸馏模型表现优异，从各个考量角度下，都刷新了当前轻量级模型的精度。由于MobileNetV3模型结构复杂，分支较多，对GPU并不友好，GPU预测速度不如MobileNetV1。GhostNet于2020年提出，通过引入ghost的网络设计理念，大大降低了计算量和参数量，同时在精度上也超过前期最高的MobileNetV3网络结构。


 ## 精度、FLOPS和参数量
@@ -57,6 +59,9 @@ MobileNetV3是Google于2019年提出的一种基于NAS的新的轻量级网络
 | ShuffleNetV2_x1_5                    | 0.716   | 0.902   | 0.726             |                   | 0.580        | 3.470             |
 | ShuffleNetV2_x2_0                    | 0.732   | 0.912   | 0.749             |                   | 1.120        | 7.320             |
 | ShuffleNetV2_swish                   | 0.700   | 0.892   |                   |                   | 0.290        | 2.260             |
+| GhostNet_x0_5                        | 0.668   | 0.869   | 0.662             | 0.866             | 0.041        | 2.600             |
+| GhostNet_x1_0                        | 0.740   | 0.916   | 0.739             | 0.914             | 0.147        | 5.200             |
+| GhostNet_x1_3                        | 0.757   | 0.925   | 0.757             | 0.927             | 0.220        | 7.300             |


 ## 基于SD855的预测速度和存储大小

--- a/docs/zh_CN/models/models_intro.md
+++ b/docs/zh_CN/models/models_intro.md
@@ -93,6 +93,10 @@ python tools/infer/predict.py \
    - [ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_5_pretrained.tar)
    - [ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x2_0_pretrained.tar)
    - [ShuffleNetV2_swish](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_swish_pretrained.tar)
+  - GhostNet系列<sup>[[23](#ref23)]</sup>([论文地址](https://arxiv.org/pdf/1911.11907.pdf))
+    - [GhostNet_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/GhostNet_x0_5_pretrained.pdparams)
+    - [GhostNet_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/GhostNet_x1_0_pretrained.pdparams)
+    - [GhostNet_x1_3](https://paddle-imagenet-models-name.bj.bcebos.com/GhostNet_x1_3_pretrained.pdparams)


 - SEResNeXt与Res2Net系列
@@ -254,3 +258,5 @@ python tools/infer/predict.py \
 <a name="ref21">[21]</a> Redmon J, Divvala S, Girshick R, et al. You only look once: Unified, real-time object detection[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 779-788.

 <a name="ref22">[22]</a> Ding X, Guo Y, Ding G, et al. Acnet: Strengthening the kernel skeletons for powerful cnn via asymmetric convolution blocks[C]//Proceedings of the IEEE International Conference on Computer Vision. 2019: 1911-1920.
+
+<a name="ref23">[23]</a> Han K, Wang Y, Tian Q, et al. GhostNet: More features from cheap operations[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020: 1580-1589.
--- a/ppcls/modeling/architectures/__init__.py
+++ b/ppcls/modeling/architectures/__init__.py
@@ -18,6 +18,7 @@ from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75
 from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
 from .googlenet import GoogLeNet
 from .vgg import VGG11, VGG13, VGG16, VGG19
+from .regnet import RegNetX_200MF, RegNetX_4GF, RegNetX_32GF, RegNetY_200MF, RegNetY_4GF, RegNetY_32GF
 from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152
 from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc
 from .resnet_vd import ResNet18_vd, ResNet34_vd, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd
@@ -37,6 +38,9 @@ from .squeezenet import SqueezeNet1_0, SqueezeNet1_1
 from .darknet import DarkNet53
 from .resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl
 from .efficientnet import EfficientNet, EfficientNetB0, EfficientNetB0_small, EfficientNetB1, EfficientNetB2, EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7
+
+from .efficientnetlite import EfficientNetLite, EfficientNetLite0, EfficientNetLite1, EfficientNetLite2, EfficientNetLite4
+
 from .res2net import Res2Net50_48w_2s, Res2Net50_26w_4s, Res2Net50_14w_8s, Res2Net50_26w_6s, Res2Net50_26w_8s, Res2Net101_26w_4s, Res2Net152_26w_4s
 from .res2net_vd import Res2Net50_vd_48w_2s, Res2Net50_vd_26w_4s, Res2Net50_vd_14w_8s, Res2Net50_vd_26w_6s, Res2Net50_vd_26w_8s, Res2Net101_vd_26w_4s, Res2Net152_vd_26w_4s, Res2Net200_vd_26w_4s
 from .hrnet import HRNet_W18_C, HRNet_W30_C, HRNet_W32_C, HRNet_W40_C, HRNet_W44_C, HRNet_W48_C, HRNet_W60_C, HRNet_W64_C, SE_HRNet_W18_C, SE_HRNet_W30_C, SE_HRNet_W32_C, SE_HRNet_W40_C, SE_HRNet_W44_C, SE_HRNet_W48_C, SE_HRNet_W60_C, SE_HRNet_W64_C

--- a/ppcls/modeling/architectures/efficientnetlite.py
+++ b/ppcls/modeling/architectures/efficientnetlite.py
--- a/ppcls/modeling/architectures/ghostnet.py
+++ b/ppcls/modeling/architectures/ghostnet.py
+#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -152,7 +166,7 @@ class GhostNet():
                                        initializer=fluid.initializer.Uniform(-stdv, stdv),
                                        name=name + '_2_weights'),
                                    bias_attr=ParamAttr(name=name + '_2_offset'))
-        #excitation = fluid.layers.clip(x=excitation, min=0, max=1)
+        excitation = fluid.layers.clip(x=excitation, min=0, max=1)
        se_scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
        return se_scale


--- a/ppcls/modeling/architectures/layers.py
+++ b/ppcls/modeling/architectures/layers.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

 from __future__ import absolute_import
 from __future__ import division
@@ -242,6 +242,8 @@ def conv2d(input,
        conv = fluid.layers.sigmoid(conv, name=name + '_sigmoid')
    elif act == 'swish':
        conv = fluid.layers.swish(conv, name=name + '_swish')
+    elif act == 'relu6':
+        conv = fluid.layers.relu6(conv, name=name + '_relu6')
    elif act == None:
        conv = conv
    else:

--- a/ppcls/modeling/architectures/regnet.py
+++ b/ppcls/modeling/architectures/regnet.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import numpy as np
+
+import paddle
+import paddle.fluid as fluid
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = [
+    "RegNetX_200MF", "RegNetX_4GF", "RegNetX_32GF", "RegNetY_200MF",
+    "RegNetY_4GF", "RegNetY_32GF"
+]
+
+
+class RegNet():
+    def __init__(self, w_a, w_0, w_m, d, group_w, bot_mul, q=8, se_on=False):
+        self.w_a = w_a
+        self.w_0 = w_0
+        self.w_m = w_m
+        self.d = d
+        self.q = q
+        self.group_w = group_w
+        self.bot_mul = bot_mul
+        # Stem type
+        self.stem_type = "simple_stem_in"
+        # Stem width
+        self.stem_w = 32
+        # Block type
+        self.block_type = "res_bottleneck_block"
+        # Stride of each stage
+        self.stride = 2
+        # Squeeze-and-Excitation (RegNetY)
+        self.se_on = se_on
+        self.se_r = 0.25
+
+    def quantize_float(self, f, q):
+        """Converts a float to closest non-zero int divisible by q."""
+        return int(round(f / q) * q)
+
+    def adjust_ws_gs_comp(self, ws, bms, gs):
+        """Adjusts the compatibility of widths and groups."""
+        ws_bot = [int(w * b) for w, b in zip(ws, bms)]
+        gs = [min(g, w_bot) for g, w_bot in zip(gs, ws_bot)]
+        ws_bot = [
+            self.quantize_float(w_bot, g) for w_bot, g in zip(ws_bot, gs)
+        ]
+        ws = [int(w_bot / b) for w_bot, b in zip(ws_bot, bms)]
+        return ws, gs
+
+    def get_stages_from_blocks(self, ws, rs):
+        """Gets ws/ds of network at each stage from per block values."""
+        ts = [
+            w != wp or r != rp
+            for w, wp, r, rp in zip(ws + [0], [0] + ws, rs + [0], [0] + rs)
+        ]
+        s_ws = [w for w, t in zip(ws, ts[:-1]) if t]
+        s_ds = np.diff([d for d, t in zip(range(len(ts)), ts) if t]).tolist()
+        return s_ws, s_ds
+
+    def generate_regnet(self, w_a, w_0, w_m, d, q=8):
+        """Generates per block ws from RegNet parameters."""
+        assert w_a >= 0 and w_0 > 0 and w_m > 1 and w_0 % q == 0
+        ws_cont = np.arange(d) * w_a + w_0
+        ks = np.round(np.log(ws_cont / w_0) / np.log(w_m))
+        ws = w_0 * np.power(w_m, ks)
+        ws = np.round(np.divide(ws, q)) * q
+        num_stages, max_stage = len(np.unique(ws)), ks.max() + 1
+        ws, ws_cont = ws.astype(int).tolist(), ws_cont.tolist()
+        return ws, num_stages, max_stage, ws_cont
+
+    def init_weights(self, op_type, filter_size=0, num_channels=0, name=None):
+        if op_type == 'conv':
+            fan_out = num_channels * filter_size * filter_size
+            param_attr = ParamAttr(
+                name=name + "_weights",
+                initializer=fluid.initializer.NormalInitializer(
+                    loc=0.0, scale=math.sqrt(2.0 / fan_out)))
+            bias_attr = False
+        elif op_type == 'bn':
+            param_attr = ParamAttr(
+                name=name + "_scale",
+                initializer=fluid.initializer.Constant(0.0))
+            bias_attr = ParamAttr(
+                name=name + "_offset",
+                initializer=fluid.initializer.Constant(0.0))
+        elif op_type == 'final_bn':
+            param_attr = ParamAttr(
+                name=name + "_scale",
+                initializer=fluid.initializer.Constant(1.0))
+            bias_attr = ParamAttr(
+                name=name + "_offset",
+                initializer=fluid.initializer.Constant(0.0))
+        return param_attr, bias_attr
+
+    def net(self, input, class_dim=1000):
+        # Generate RegNet ws per block
+        b_ws, num_s, max_s, ws_cont = self.generate_regnet(
+            self.w_a, self.w_0, self.w_m, self.d, self.q)
+        # Convert to per stage format
+        ws, ds = self.get_stages_from_blocks(b_ws, b_ws)
+        # Generate group widths and bot muls
+        gws = [self.group_w for _ in range(num_s)]
+        bms = [self.bot_mul for _ in range(num_s)]
+        # Adjust the compatibility of ws and gws
+        ws, gws = self.adjust_ws_gs_comp(ws, bms, gws)
+        # Use the same stride for each stage
+        ss = [self.stride for _ in range(num_s)]
+        # Use SE for RegNetY
+        se_r = self.se_r
+
+        # Construct the model
+        # Group params by stage
+        stage_params = list(zip(ds, ws, ss, bms, gws))
+        # Construct the stem
+        conv = self.conv_bn_layer(
+            input=input,
+            num_filters=self.stem_w,
+            filter_size=3,
+            stride=2,
+            padding=1,
+            act='relu',
+            name="stem_conv")
+        # Construct the stages
+        for block, (d, w_out, stride, bm, gw) in enumerate(stage_params):
+            for i in range(d):
+                # Stride apply to the first block of the stage
+                b_stride = stride if i == 0 else 1
+                conv_name = 's' + str(block + 1) + '_b' + str(i +
+                                                              1)  # chr(97 + i)
+                conv = self.bottleneck_block(
+                    input=conv,
+                    num_filters=w_out,
+                    stride=b_stride,
+                    bm=bm,
+                    gw=gw,
+                    se_r=self.se_r,
+                    name=conv_name)
+        pool = fluid.layers.pool2d(
+            input=conv, pool_type='avg', global_pooling=True)
+        out = fluid.layers.fc(
+            input=pool,
+            size=class_dim,
+            param_attr=ParamAttr(
+                name="fc_0.w_0",
+                initializer=fluid.initializer.NormalInitializer(
+                    loc=0.0, scale=0.01)),
+            bias_attr=ParamAttr(
+                name="fc_0.b_0", initializer=fluid.initializer.Constant(0.0)))
+        return out
+
+    def conv_bn_layer(self,
+                      input,
+                      num_filters,
+                      filter_size,
+                      stride=1,
+                      groups=1,
+                      padding=0,
+                      act=None,
+                      name=None,
+                      final_bn=False):
+        param_attr, bias_attr = self.init_weights(
+            op_type='conv',
+            filter_size=filter_size,
+            num_channels=num_filters,
+            name=name)
+        conv = fluid.layers.conv2d(
+            input=input,
+            num_filters=num_filters,
+            filter_size=filter_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            act=None,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            name=name + '.conv2d.output.1')
+        bn_name = name + '_bn'
+        if final_bn:
+            param_attr, bias_attr = self.init_weights(
+                op_type='final_bn', name=bn_name)
+        else:
+            param_attr, bias_attr = self.init_weights(
+                op_type='bn', name=bn_name)
+        return fluid.layers.batch_norm(
+            input=conv,
+            act=act,
+            name=bn_name + '.output.1',
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance', )
+
+    def shortcut(self, input, ch_out, stride, name):
+        ch_in = input.shape[1]
+        if ch_in != ch_out or stride != 1:
+            return self.conv_bn_layer(
+                input=input,
+                num_filters=ch_out,
+                filter_size=1,
+                stride=stride,
+                padding=0,
+                act=None,
+                name=name)
+        else:
+            return input
+
+    def bottleneck_block(self, input, num_filters, stride, bm, gw, se_r, name):
+        # Compute the bottleneck width
+        w_b = int(round(num_filters * bm))
+        # Compute the number of groups
+        num_gs = w_b // gw
+        conv0 = self.conv_bn_layer(
+            input=input,
+            num_filters=w_b,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act='relu',
+            name=name + "_branch2a")
+        conv1 = self.conv_bn_layer(
+            input=conv0,
+            num_filters=w_b,
+            filter_size=3,
+            stride=stride,
+            padding=1,
+            groups=num_gs,
+            act='relu',
+            name=name + "_branch2b")
+        # Squeeze-and-Excitation (SE)
+        if self.se_on:
+            w_se = int(round(input.shape[1] * se_r))
+            conv1 = self.squeeze_excitation(
+                input=conv1,
+                num_channels=w_b,
+                reduction_channels=w_se,
+                name=name + "_branch2se")
+
+        conv2 = self.conv_bn_layer(
+            input=conv1,
+            num_filters=num_filters,
+            filter_size=1,
+            stride=1,
+            padding=0,
+            act=None,
+            name=name + "_branch2c",
+            final_bn=True)
+
+        short = self.shortcut(
+            input, num_filters, stride, name=name + "_branch1")
+
+        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
+
+    def squeeze_excitation(self,
+                           input,
+                           num_channels,
+                           reduction_channels,
+                           name=None):
+        pool = fluid.layers.pool2d(
+            input=input, pool_size=0, pool_type='avg', global_pooling=True)
+        fan_out = num_channels
+        squeeze = fluid.layers.conv2d(
+            input=pool,
+            num_filters=reduction_channels,
+            filter_size=1,
+            act='relu',
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.NormalInitializer(
+                    loc=0.0, scale=math.sqrt(2.0 / fan_out)),
+                name=name + '_sqz_weights'),
+            bias_attr=ParamAttr(name=name + '_sqz_offset'))
+        excitation = fluid.layers.conv2d(
+            input=squeeze,
+            num_filters=num_channels,
+            filter_size=1,
+            act='sigmoid',
+            param_attr=ParamAttr(
+                initializer=fluid.initializer.NormalInitializer(
+                    loc=0.0, scale=math.sqrt(2.0 / fan_out)),
+                name=name + '_exc_weights'),
+            bias_attr=ParamAttr(name=name + '_exc_offset'))
+        scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
+        return scale
+
+
+def RegNetX_200MF():
+    model = RegNet(
+        w_a=36.44, w_0=24, w_m=2.49, d=13, group_w=8, bot_mul=1.0, q=8)
+    return model
+
+
+def RegNetX_4GF():
+    model = RegNet(
+        w_a=38.65, w_0=96, w_m=2.43, d=23, group_w=40, bot_mul=1.0, q=8)
+    return model
+
+
+def RegNetX_32GF():
+    model = RegNet(
+        w_a=69.86, w_0=320, w_m=2.0, d=23, group_w=168, bot_mul=1.0, q=8)
+    return model
+
+
+def RegNetY_200MF():
+    model = RegNet(
+        w_a=36.44,
+        w_0=24,
+        w_m=2.49,
+        d=13,
+        group_w=8,
+        bot_mul=1.0,
+        q=8,
+        se_on=True)
+    return model
+
+
+def RegNetY_4GF():
+    model = RegNet(
+        w_a=31.41,
+        w_0=96,
+        w_m=2.24,
+        d=22,
+        group_w=64,
+        bot_mul=1.0,
+        q=8,
+        se_on=True)
+    return model
+
+
+def RegNetY_32GF():
+    model = RegNet(
+        w_a=115.89,
+        w_0=232,
+        w_m=2.53,
+        d=20,
+        group_w=232,
+        bot_mul=1.0,
+        q=8,
+        se_on=True)
+    return model
--- a/ppcls/utils/config.py
+++ b/ppcls/utils/config.py
@@ -144,9 +144,14 @@ def override(dl, ks, v):
            override(dl[k], ks[1:], v)
    else:
        if len(ks) == 1:
-            assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl))
+            #assert ks[0] in dl, ('{} is not exist in {}'.format(ks[0], dl))
+            if not ks[0] in dl:
+                logger.warning('A new filed ({}) detected!'.format(ks[0], dl))
            dl[ks[0]] = str2num(v)
        else:
+            assert ks[0] in dl, (
+                '({}) doesn\'t exist in {}, a new dict field is invalid'.
+                format(ks[0], dl))
            override(dl[ks[0]], ks[1:], v)



--- a/ppcls/utils/pretrained.list
+++ b/ppcls/utils/pretrained.list
@@ -119,3 +119,6 @@ VGG19
 DarkNet53_ImageNet1k
 ResNet50_ACNet_deploy
 CSPResNet50_leaky
+GhostNet_x0_5
+GhostNet_x1_0
+GhostNet_x1_3
--- a/tools/dali.py
+++ b/tools/dali.py
@@ -151,10 +151,15 @@ def build(settings, mode='train'):

    file_root = settings.TRAIN.data_dir
    bs = settings.TRAIN.batch_size if mode == 'train' else settings.VALID.batch_size
-    print(bs, paddle.fluid.core.get_cuda_device_count())
-    assert bs % paddle.fluid.core.get_cuda_device_count() == 0, \
+
+    gpu_num = paddle.fluid.core.get_cuda_device_count() if (
+        'PADDLE_TRAINERS_NUM') and (
+            'PADDLE_TRAINER_ID'
+    ) not in env else int(env.get('PADDLE_TRAINERS_NUM', 0))
+
+    assert bs % gpu_num == 0, \
        "batch size must be multiple of number of devices"
-    batch_size = bs // paddle.fluid.core.get_cuda_device_count()
+    batch_size = bs // gpu_num

    image_mean = [0.485, 0.456, 0.406]
    image_std = [0.229, 0.224, 0.225]

--- a/tools/program.py
+++ b/tools/program.py
@@ -456,16 +456,29 @@ def run(dataloader,
            logger.scaler('loss', metrics[0][0], total_step, vdl_writer)
            total_step += 1
        if mode == 'eval':
-            logger.info("{:s} step:{:<4d} {:s}s".format(mode, idx, fetchs_str))
+            if idx % config.get('print_interval', 10) == 0:
+                logger.info("{:s} step:{:<4d} {:s}".format(mode, idx,
+                                                           fetchs_str))
        else:
            epoch_str = "epoch:{:<3d}".format(epoch)
            step_str = "{:s} step:{:<4d}".format(mode, idx)

-            logger.info("{:s} {:s} {:s}".format(
-                logger.coloring(epoch_str, "HEADER")
-                if idx == 0 else epoch_str,
-                logger.coloring(step_str, "PURPLE"),
-                logger.coloring(fetchs_str, 'OKGREEN')))
+            # Keep the first 10 batches statistics, They are important for develop
+            if epoch == 0 and idx < 10:
+                logger.info("{:s} {:s} {:s}".format(
+                    logger.coloring(epoch_str, "HEADER")
+                    if idx == 0 else epoch_str,
+                    logger.coloring(step_str, "PURPLE"),
+                    logger.coloring(fetchs_str, 'OKGREEN')))
+
+            else:
+                if idx % config.get('print_interval', 10) == 0:
+                    logger.info("{:s} {:s} {:s}".format(
+                        logger.coloring(epoch_str, "HEADER")
+                        if idx == 0 else epoch_str,
+                        logger.coloring(step_str, "PURPLE"),
+                        logger.coloring(fetchs_str, 'OKGREEN')))
+
    if config.get('use_dali'):
        dataloader.reset()


--- a/tools/run.sh
+++ b/tools/run.sh
@@ -5,4 +5,5 @@ export PYTHONPATH=$PWD:$PYTHONPATH
 python -m paddle.distributed.launch \
    --selected_gpus="0,1,2,3" \
    tools/train.py \
-        -c ./configs/ResNet/ResNet50.yaml
+        -c ./configs/ResNet/ResNet50.yaml \
+        -o print_interval=10