From 8406236778268e2398ad3e20a35731e3d3f9f43b Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Tue, 30 Jun 2020 16:03:46 +0800 Subject: [PATCH] Update ssdlite-mbv3 modelzoo (#992) * update mbv3 ssdlite * fix ssdlite link --- configs/mobile/README.md | 31 +++- configs/mobile/README_en.md | 27 ++- configs/ssd/ssdlite_mobilenet_v3_large.yml | 4 +- .../ssd/ssdlite_mobilenet_v3_large_fpn.yml | 170 ++++++++++++++++++ configs/ssd/ssdlite_mobilenet_v3_small.yml | 6 +- .../ssd/ssdlite_mobilenet_v3_small_fpn.yml | 170 ++++++++++++++++++ docs/MODEL_ZOO.md | 8 +- docs/MODEL_ZOO_cn.md | 8 +- ppdet/modeling/architectures/ssd.py | 7 +- ppdet/modeling/backbones/fpn.py | 24 ++- ppdet/modeling/backbones/mobilenet_v3.py | 47 +++-- 11 files changed, 461 insertions(+), 41 deletions(-) mode change 100755 => 100644 configs/mobile/README_en.md create mode 100644 configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml create mode 100644 configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml diff --git a/configs/mobile/README.md b/configs/mobile/README.md index bc155b781..24db1c98c 100755 --- a/configs/mobile/README.md +++ b/configs/mobile/README.md @@ -7,19 +7,22 @@ PaddleDetection目前提供一系列针对移动应用进行优化的模型,主要支持以下结构: -| 骨干网络 | 结构 | 输入大小 | 图片/gpu 1 | 学习率策略 | Box AP | 下载 | PaddleLite模型下载 | +| 骨干网络 | 结构 | 输入大小 | 图片/gpu [1](#gpu) | 学习率策略 | Box AP | 下载 | PaddleLite模型下载 | | :----------------------- | :------------------------ | :---: | :--------------------: | :------------ | :----: | :--- | :----------------- | -| MobileNetV3 Small | SSDLite | 320 | 64 | 400K (cosine) | 16.6 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small.tar) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_small.tar) | -| MobileNetV3 Large | SSDLite | 320 | 64 | 400K (cosine) | 22.8 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.tar) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_large.tar) | +| MobileNetV3 Small | SSDLite | 320 | 64 | 400K (cosine) | 16.2 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small.pdparams) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_small.tar) | +| MobileNetV3 Small | SSDLite Quant [2](#quant) | 320 | 64 | 400K (cosine) | 15.4 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small_quant.tar) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_small_quant.tar) | +| MobileNetV3 Large | SSDLite | 320 | 64 | 400K (cosine) | 23.3 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.pdparams) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_large.tar) | +| MobileNetV3 Large | SSDLite Quant [2](#quant) | 320 | 64 | 400K (cosine) | 22.6 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large_quant.tar) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_large_quant.tar) | | MobileNetV3 Large w/ FPN | Cascade RCNN | 320 | 2 | 500k (cosine) | 25.0 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/cascade_rcnn_mobilenetv3_fpn_320.tar) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/cascade_rcnn_mobilenetv3_fpn_320.tar) | | MobileNetV3 Large w/ FPN | Cascade RCNN | 640 | 2 | 500k (cosine) | 30.2 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/cascade_rcnn_mobilenetv3_fpn_640.tar) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/cascade_rcnn_mobilenetv3_fpn_640.tar) | | MobileNetV3 Large | YOLOv3 | 320 | 8 | 500K | 27.1 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/yolov3_mobilenet_v3.tar) | -| MobileNetV3 Large | YOLOv3 Prune 2 | 320 | 8 | - | 24.6 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/yolov3_mobilenet_v3_prune75875_FPGM_distillby_r34.pdparams) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/yolov3_mobilenet_v3_prune86_FPGM_320.tar) | +| MobileNetV3 Large | YOLOv3 Prune [3](#prune) | 320 | 8 | - | 24.6 | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/yolov3_mobilenet_v3_prune75875_FPGM_distillby_r34.pdparams) | [链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/yolov3_mobilenet_v3_prune86_FPGM_320.tar) | **注意**: -- [1] 模型统一使用8卡训练. -- [2] 参考下面关于YOLO剪裁的说明 +- [1] 模型统一使用8卡训练。 +- [2] 参考下面关于[SSDLite量化的说明](#SSDLite量化说明)。 +- [3] 参考下面关于[YOLO剪裁的说明](#YOLOv3剪裁说明)。 ## 评测结果 @@ -37,7 +40,9 @@ PaddleDetection目前提供一系列针对移动应用进行优化的模型, | | SD625 | SD835 | SD845 | SD855 | Kirin 970 | Kirin 980 | |------------------|---------|---------|---------|---------|-----------|-----------| | SSDLite Large | 289.071 | 134.408 | 91.933 | 48.2206 | 144.914 | 55.1186 | +| SSDLite Large Quant | | | | | | | | SSDLite Small | 122.932 | 57.1914 | 41.003 | 22.0694 | 61.5468 | 25.2106 | +| SSDLite Small Quant | | | | | | | | YOLOv3 baseline | 1082.5 | 435.77 | 317.189 | 155.948 | 536.987 | 178.999 | | YOLOv3 prune | 253.98 | 131.279 | 89.4124 | 48.2856 | 122.732 | 55.8626 | | Cascade RCNN 320 | 286.526 | 125.635 | 87.404 | 46.184 | 149.179 | 52.9994 | @@ -48,16 +53,28 @@ PaddleDetection目前提供一系列针对移动应用进行优化的模型, | | SD625 | SD835 | SD845 | SD855 | Kirin 970 | Kirin 980 | |------------------|---------|---------|---------|---------|-----------|-----------| | SSDLite Large | 107.535 | 51.1382 | 34.6392 | 20.4978 | 50.5598 | 24.5318 | +| SSDLite Large Quant | | | | | | | | SSDLite Small | 51.5704 | 24.5156 | 18.5486 | 11.4218 | 24.9946 | 16.7158 | +| SSDLite Small Quant | | | | | | | | YOLOv3 baseline | 413.486 | 184.248 | 133.624 | 75.7354 | 202.263 | 126.435 | | YOLOv3 prune | 98.5472 | 53.6228 | 34.4306 | 21.3112 | 44.0722 | 31.201 | | Cascade RCNN 320 | 131.515 | 59.6026 | 39.4338 | 23.5802 | 58.5046 | 36.9486 | | Cascade RCNN 640 | 473.083 | 224.543 | 156.205 | 100.686 | 231.108 | 138.391 | +## SSDLite量化说明 + +在SSDLite模型中我们采用完整量化训练的方式对模型进行训练,在8卡GPU下共训练40万轮,训练中将`res_conv1`与`se_block`固定不训练,执行指令为: + +```shell +python slim/quantization/train.py --not_quant_pattern res_conv1 se_block \ + -c configs/ssd/ssdlite_mobilenet_v3_large.yml \ + --eval +``` +更多量化教程请参考[模型量化压缩教程](../../docs/advanced_tutorials/slim/quantization/QUANTIZATION.md) ## YOLOv3剪裁说明 -首先对YOLO检测头进行剪裁,然后再使用 YOLOv3-ResNet34 作为teacher网络对剪裁后的模型进行蒸馏, teacher网络在COCO的mAP为31.4 (输入大小320\*320). +首先对YOLO检测头进行剪裁,然后再使用 YOLOv3-ResNet34 作为teacher网络对剪裁后的模型进行蒸馏, teacher网络在COCO上的mAP为31.4 (输入大小320\*320). 可以使用如下两种方式进行剪裁: diff --git a/configs/mobile/README_en.md b/configs/mobile/README_en.md old mode 100755 new mode 100644 index 2d6d3a1b8..133afffe9 --- a/configs/mobile/README_en.md +++ b/configs/mobile/README_en.md @@ -7,10 +7,12 @@ English | [简体中文](README.md) This directory contains models optimized for mobile applications, at present the following models included: -| Backbone | Architecture | Input | Image/gpu 1 | Lr schd | Box AP | Download | PaddleLite Model Download | +| Backbone | Architecture | Input | Image/gpu [1](#gpu) | Lr schd | Box AP | Download | PaddleLite Model Download | | :----------------------- | :------------------------ | :---: | :--------------------: | :------------ | :----: | :------- | :------------------------ | -| MobileNetV3 Small | SSDLite | 320 | 64 | 400K (cosine) | 16.6 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small.tar) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_small.tar) | -| MobileNetV3 Large | SSDLite | 320 | 64 | 400K (cosine) | 22.8 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.tar) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_large.tar) | +| MobileNetV3 Small | SSDLite | 320 | 64 | 400K (cosine) | 16.2 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small.pdparam) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_small.tar) | +| MobileNetV3 Small | SSDLite Quant [2](#quant) | 320 | 64 | 400K (cosine) | 15.4 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small_quant.tar) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_small_quant.tar) | +| MobileNetV3 Large | SSDLite | 320 | 64 | 400K (cosine) | 23.3 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.pdparam) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_large.tar) | +| MobileNetV3 Large | SSDLite Quant [2](#quant) | 320 | 64 | 400K (cosine) | 22.6 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large_quant.tar) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/ssdlite_mobilenet_v3_large_quant.tar) | | MobileNetV3 Large w/ FPN | Cascade RCNN | 320 | 2 | 500k (cosine) | 25.0 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/cascade_rcnn_mobilenetv3_fpn_320.tar) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/cascade_rcnn_mobilenetv3_fpn_320.tar) | | MobileNetV3 Large w/ FPN | Cascade RCNN | 640 | 2 | 500k (cosine) | 30.2 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/cascade_rcnn_mobilenetv3_fpn_640.tar) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/cascade_rcnn_mobilenetv3_fpn_640.tar) | | MobileNetV3 Large | YOLOv3 | 320 | 8 | 500K | 27.1 | [Link](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v3.pdparams) | [Link](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/lite/yolov3_mobilenet_v3.tar) | @@ -19,7 +21,8 @@ This directory contains models optimized for mobile applications, at present the **Notes**: - [1] All models are trained on 8 GPUs. -- [2] See the note section on how YOLO head is pruned +- [2] See the note section on [SSDLite quantization](#Notes-on-SSDLite-quant)。 +- [3] See the note section on [how YOLO head is pruned](#Notes-on-YOLOv3-pruning). ## Benchmarks Results @@ -37,7 +40,9 @@ This directory contains models optimized for mobile applications, at present the | | SD625 | SD835 | SD845 | SD855 | Kirin 970 | Kirin 980 | |------------------|---------|---------|---------|---------|-----------|-----------| | SSDLite Large | 289.071 | 134.408 | 91.933 | 48.2206 | 144.914 | 55.1186 | + | SSDLite Large Quant | | | | | | | | SSDLite Small | 122.932 | 57.1914 | 41.003 | 22.0694 | 61.5468 | 25.2106 | + | SSDLite Small Quant | | | | | | | | YOLOv3 baseline | 1082.5 | 435.77 | 317.189 | 155.948 | 536.987 | 178.999 | | YOLOv3 prune | 253.98 | 131.279 | 89.4124 | 48.2856 | 122.732 | 55.8626 | | Cascade RCNN 320 | 286.526 | 125.635 | 87.404 | 46.184 | 149.179 | 52.9994 | @@ -48,13 +53,27 @@ This directory contains models optimized for mobile applications, at present the | | SD625 | SD835 | SD845 | SD855 | Kirin 970 | Kirin 980 | |------------------|---------|---------|---------|---------|-----------|-----------| | SSDLite Large | 107.535 | 51.1382 | 34.6392 | 20.4978 | 50.5598 | 24.5318 | + | SSDLite Large Quant | | | | | | | | SSDLite Small | 51.5704 | 24.5156 | 18.5486 | 11.4218 | 24.9946 | 16.7158 | + | SSDLite Small Quant | | | | | | | | YOLOv3 baseline | 413.486 | 184.248 | 133.624 | 75.7354 | 202.263 | 126.435 | | YOLOv3 prune | 98.5472 | 53.6228 | 34.4306 | 21.3112 | 44.0722 | 31.201 | | Cascade RCNN 320 | 131.515 | 59.6026 | 39.4338 | 23.5802 | 58.5046 | 36.9486 | | Cascade RCNN 640 | 473.083 | 224.543 | 156.205 | 100.686 | 231.108 | 138.391 | +## Notes on SSDLite quantization + +We use a complete quantitative training method to train the SSDLite model. It is trained for a total of 400,000 rounds with the 8-card GPU. We freeze `res_conv1` and `se_block`. The command used is listed bellow: + +```shell +python slim/quantization/train.py --not_quant_pattern res_conv1 se_block \ + -c configs/ssd/ssdlite_mobilenet_v3_large.yml \ + --eval +``` + +For more quantization tutorials, please refer to [Model Quantization Compression Tutorial](../../docs/advanced_tutorials/slim/quantization/QUANTIZATION.md) + ## Notes on YOLOv3 pruning We pruned the YOLO-head and distill the pruned model with YOLOv3-ResNet34 as the teacher, which has a higher mAP on COCO (31.4 with 320\*320 input). diff --git a/configs/ssd/ssdlite_mobilenet_v3_large.yml b/configs/ssd/ssdlite_mobilenet_v3_large.yml index 43d08600e..85cda27f6 100644 --- a/configs/ssd/ssdlite_mobilenet_v3_large.yml +++ b/configs/ssd/ssdlite_mobilenet_v3_large.yml @@ -26,8 +26,10 @@ MobileNetV3: scale: 1.0 model_name: large extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] - conv_decay: 0.00004 feature_maps: [5, 7, 8, 9, 10, 11] + lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] + conv_decay: 0.00004 + multiplier: 0.5 SSDLiteMultiBoxHead: aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] diff --git a/configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml b/configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml new file mode 100644 index 000000000..f168785dd --- /dev/null +++ b/configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml @@ -0,0 +1,170 @@ +architecture: SSD +use_gpu: true +max_iters: 400000 +snapshot_iter: 20000 +log_smooth_window: 20 +log_iter: 20 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar +save_dir: output +weights: output/ssdlite_mobilenet_v3_large_fpn/model_final +# 80(label_class) + 1(background) +num_classes: 81 + +SSD: + backbone: MobileNetV3 + fpn: FPN + multi_box_head: SSDLiteMultiBoxHead + output_decoder: + background_label: 0 + keep_top_k: 200 + nms_eta: 1.0 + nms_threshold: 0.45 + nms_top_k: 400 + score_threshold: 0.01 + +FPN: + num_chan: 256 + max_level: 7 + norm_type: bn + norm_decay: 0.00004 + reverse_out: true + +MobileNetV3: + scale: 1.0 + model_name: large + extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] + feature_maps: [5, 7, 8, 9, 10, 11] + lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] + conv_decay: 0.00004 + +SSDLiteMultiBoxHead: + aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] + base_size: 320 + steps: [16, 32, 64, 107, 160, 320] + flip: true + clip: true + max_ratio: 95 + min_ratio: 20 + offset: 0.5 + conv_decay: 0.00004 + +LearningRate: + base_lr: 0.4 + schedulers: + - !CosineDecay + max_iters: 400000 + - !LinearWarmup + start_factor: 0.33333 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +TrainReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_train2017.json + image_dir: train2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !RandomDistort + brightness_lower: 0.875 + brightness_upper: 1.125 + is_order: true + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop + allow_no_crop: false + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !RandomFlipImage + is_normalized: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: true + batch_size: 64 + shuffle: true + drop_last: true + # Number of working threads/processes. To speed up, can be set to 16 or 32 etc. + worker_num: 8 + # Size of shared memory used in result queue. After increasing `worker_num`, need expand `memsize`. + memsize: 8G + # Buffer size for multi threads/processes.one instance in buffer is one batch data. + # To speed up, can be set to 64 or 128 etc. + bufsize: 32 + use_process: true + + +EvalReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + worker_num: 8 + bufsize: 32 + use_process: false + +TestReader: + inputs_def: + image_shape: [3,320,320] + fields: ['image', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + - !ResizeImage + interp: 1 + max_size: 0 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/configs/ssd/ssdlite_mobilenet_v3_small.yml b/configs/ssd/ssdlite_mobilenet_v3_small.yml index 528ac6d3f..d09f96702 100644 --- a/configs/ssd/ssdlite_mobilenet_v3_small.yml +++ b/configs/ssd/ssdlite_mobilenet_v3_small.yml @@ -5,7 +5,7 @@ snapshot_iter: 20000 log_smooth_window: 20 log_iter: 20 metric: COCO -pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_pretrained.tar +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar save_dir: output weights: output/ssd_mobilenet_v3_small/model_final # 80(label_class) + 1(background) @@ -26,8 +26,10 @@ MobileNetV3: scale: 1.0 model_name: small extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] - conv_decay: 0.00004 feature_maps: [5, 7, 8, 9, 10, 11] + lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] + conv_decay: 0.00004 + multiplier: 0.5 SSDLiteMultiBoxHead: aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] diff --git a/configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml b/configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml new file mode 100644 index 000000000..1d8182aa2 --- /dev/null +++ b/configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml @@ -0,0 +1,170 @@ +architecture: SSD +use_gpu: true +max_iters: 400000 +snapshot_iter: 20000 +log_smooth_window: 20 +log_iter: 20 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar +save_dir: output +weights: output/ssdlite_mobilenet_v3_small_fpn/model_final +# 80(label_class) + 1(background) +num_classes: 81 + +SSD: + backbone: MobileNetV3 + fpn: FPN + multi_box_head: SSDLiteMultiBoxHead + output_decoder: + background_label: 0 + keep_top_k: 200 + nms_eta: 1.0 + nms_threshold: 0.45 + nms_top_k: 400 + score_threshold: 0.01 + +FPN: + num_chan: 256 + max_level: 7 + norm_type: bn + norm_decay: 0.00004 + reverse_out: true + +MobileNetV3: + scale: 1.0 + model_name: small + extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] + feature_maps: [5, 7, 8, 9, 10, 11] + lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] + conv_decay: 0.00004 + +SSDLiteMultiBoxHead: + aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] + base_size: 320 + steps: [16, 32, 64, 107, 160, 320] + flip: true + clip: true + max_ratio: 95 + min_ratio: 20 + offset: 0.5 + conv_decay: 0.00004 + +LearningRate: + base_lr: 0.4 + schedulers: + - !CosineDecay + max_iters: 400000 + - !LinearWarmup + start_factor: 0.33333 + steps: 2000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +TrainReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_train2017.json + image_dir: train2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !RandomDistort + brightness_lower: 0.875 + brightness_upper: 1.125 + is_order: true + - !RandomExpand + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop + allow_no_crop: false + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !RandomFlipImage + is_normalized: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: true + batch_size: 64 + shuffle: true + drop_last: true + # Number of working threads/processes. To speed up, can be set to 16 or 32 etc. + worker_num: 8 + # Size of shared memory used in result queue. After increasing `worker_num`, need expand `memsize`. + memsize: 8G + # Buffer size for multi threads/processes.one instance in buffer is one batch data. + # To speed up, can be set to 64 or 128 etc. + bufsize: 32 + use_process: true + + +EvalReader: + inputs_def: + image_shape: [3, 320, 320] + fields: ['image', 'gt_bbox', 'gt_class', 'im_shape', 'im_id'] + dataset: + !COCODataSet + dataset_dir: dataset/coco + anno_path: annotations/instances_val2017.json + image_dir: val2017 + sample_transforms: + - !DecodeImage + to_rgb: true + - !NormalizeBox {} + - !ResizeImage + interp: 1 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + worker_num: 8 + bufsize: 32 + use_process: false + +TestReader: + inputs_def: + image_shape: [3,320,320] + fields: ['image', 'im_id', 'im_shape'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + sample_transforms: + - !DecodeImage + to_rgb: true + - !ResizeImage + interp: 1 + max_size: 0 + target_size: 320 + use_cv2: false + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: true + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md index a996271b4..f0a4b07af 100644 --- a/docs/MODEL_ZOO.md +++ b/docs/MODEL_ZOO.md @@ -193,9 +193,11 @@ results of image size 608/416/320 above. Deformable conv is added on stage 5 of | Backbone | Size | Image/gpu | Lr schd | Inf time (fps) | Box AP | Download | Configs | | :------: | :--: | :-------: | :-----: | :------------: | :----: | :----------------------------------------------------------: | :----: | -| MobileNet_v1 | 300 | 64 | 40w | - | 23.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssdlite_mobilenet_v1.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v1.yml) | -| MobileNet_v3 small | 320 | 64 | 40w | - | 16.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_small.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small.yml) | -| MobileNet_v3 large | 320 | 64 | 40w | - | 22.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large.yml) | +| MobileNet_v1 | 300 | 64 | Cosine decay(40w) | - | 23.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ssdlite_mobilenet_v1.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v1.yml) | +| MobileNet_v3 small | 320 | 64 | Cosine decay(40w) | - | 16.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small.yml) | +| MobileNet_v3 large | 320 | 64 | Cosine decay(40w) | - | 23.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large.yml) | +| MobileNet_v3 large w/ FPN | 320 | 64 | Cosine decay(40w) | - | 18.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small_fpn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml) | +| MobileNet_v3 large w/ FPN | 320 | 64 | Cosine decay(40w) | - | 24.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large_fpn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml) | **Notes:** `SSDLite` is trained in 8 GPU with total batch size as 512 and uses cosine decay strategy to train. diff --git a/docs/MODEL_ZOO_cn.md b/docs/MODEL_ZOO_cn.md index dc315ff21..0398bbada 100644 --- a/docs/MODEL_ZOO_cn.md +++ b/docs/MODEL_ZOO_cn.md @@ -185,9 +185,11 @@ Paddle提供基于ImageNet的骨架网络预训练模型。所有预训练模型 | 骨架网络 | 输入尺寸 | 每张GPU图片个数 | 学习率策略|推理时间(fps) | Box AP | 下载 | 配置文件 | | :----------: | :--: | :-----: | :-----: |:------------: |:----: | :-------: | :----: | -| MobileNet_v1 | 300 | 64 | 40w | - | 23.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssdlite_mobilenet_v1.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v1.yml) | -| MobileNet_v3 small | 320 | 64 | 40w | - | 16.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_small.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small.yml) | -| MobileNet_v3 large | 320 | 64 | 40w | - | 22.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobilenet_v3_ssdlite_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large.yml) | +| MobileNet_v1 | 300 | 64 | Cosine decay(40w) | - | 23.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ssdlite_mobilenet_v1.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v1.yml) | +| MobileNet_v3 small | 320 | 64 | Cosine decay(40w) | - | 16.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small.yml) | +| MobileNet_v3 large | 320 | 64 | Cosine decay(40w) | - | 23.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large.yml) | +| MobileNet_v3 large w/ FPN | 320 | 64 | Cosine decay(40w) | - | 18.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_small_fpn.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_small_fpn.yml) | +| MobileNet_v3 large w/ FPN | 320 | 64 | Cosine decay(40w) | - | 24.3 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/mobile_models/ssdlite_mobilenet_v3_large_fpn.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ssd/ssdlite_mobilenet_v3_large_fpn.yml) | **注意事项:** SSDLite模型使用学习率余弦衰减策略在8卡GPU下总batch size为512。 diff --git a/ppdet/modeling/architectures/ssd.py b/ppdet/modeling/architectures/ssd.py index 5e082479d..78bd1ca4d 100644 --- a/ppdet/modeling/architectures/ssd.py +++ b/ppdet/modeling/architectures/ssd.py @@ -40,16 +40,18 @@ class SSD(object): """ __category__ = 'architecture' - __inject__ = ['backbone', 'multi_box_head', 'output_decoder'] + __inject__ = ['backbone', 'multi_box_head', 'output_decoder', 'fpn'] __shared__ = ['num_classes'] def __init__(self, backbone, + fpn=None, multi_box_head='MultiBoxHead', output_decoder=SSDOutputDecoder().__dict__, num_classes=21): super(SSD, self).__init__() self.backbone = backbone + self.fpn = fpn self.multi_box_head = multi_box_head self.num_classes = num_classes self.output_decoder = output_decoder @@ -70,6 +72,9 @@ class SSD(object): # backbone body_feats = self.backbone(im) + if self.fpn is not None: + body_feats, spatial_scale = self.fpn.get_output(body_feats) + if isinstance(body_feats, OrderedDict): body_feat_names = list(body_feats.keys()) body_feats = [body_feats[name] for name in body_feat_names] diff --git a/ppdet/modeling/backbones/fpn.py b/ppdet/modeling/backbones/fpn.py index 2eefd3158..a89730f4b 100644 --- a/ppdet/modeling/backbones/fpn.py +++ b/ppdet/modeling/backbones/fpn.py @@ -41,6 +41,8 @@ class FPN(object): spatial_scale (list): feature map scaling factor has_extra_convs (bool): whether has extral convolutions in higher levels norm_type (str|None): normalization type, 'bn'/'sync_bn'/'affine_channel' + norm_decay (float): weight decay for normalization layer weights. + reverse_out (bool): whether to flip the output. """ __shared__ = ['norm_type', 'freeze_norm'] @@ -51,8 +53,10 @@ class FPN(object): spatial_scale=[1. / 32., 1. / 16., 1. / 8., 1. / 4.], has_extra_convs=False, norm_type=None, + norm_decay=0., freeze_norm=False, - use_c5=True): + use_c5=True, + reverse_out=False): self.freeze_norm = freeze_norm self.num_chan = num_chan self.min_level = min_level @@ -60,7 +64,9 @@ class FPN(object): self.spatial_scale = spatial_scale self.has_extra_convs = has_extra_convs self.norm_type = norm_type + self.norm_decay = norm_decay self.use_c5 = use_c5 + self.reverse_out = reverse_out def _add_topdown_lateral(self, body_name, body_input, upper_output): lateral_name = 'fpn_inner_' + body_name + '_lateral' @@ -74,6 +80,7 @@ class FPN(object): 1, initializer=initializer, norm_type=self.norm_type, + norm_decay=self.norm_decay, freeze_norm=self.freeze_norm, name=lateral_name, norm_name=lateral_name) @@ -89,8 +96,14 @@ class FPN(object): learning_rate=2., regularizer=L2Decay(0.)), name=lateral_name) - topdown = fluid.layers.resize_nearest( - upper_output, scale=2., name=topdown_name) + if body_input.shape[2] == -1 and body_input.shape[3] == -1: + topdown = fluid.layers.resize_nearest( + upper_output, scale=2., name=topdown_name) + else: + topdown = fluid.layers.resize_nearest( + upper_output, + out_shape=[body_input.shape[2], body_input.shape[3]], + name=topdown_name) return lateral + topdown @@ -122,6 +135,7 @@ class FPN(object): 1, initializer=initializer, norm_type=self.norm_type, + norm_decay=self.norm_decay, freeze_norm=self.freeze_norm, name=fpn_inner_name, norm_name=fpn_inner_name) @@ -158,6 +172,7 @@ class FPN(object): 3, initializer=initializer, norm_type=self.norm_type, + norm_decay=self.norm_decay, freeze_norm=self.freeze_norm, name=fpn_name, norm_name=fpn_name) @@ -217,5 +232,8 @@ class FPN(object): fpn_dict[fpn_name] = fpn_blob fpn_name_list.insert(0, fpn_name) spatial_scale.insert(0, spatial_scale[0] * 0.5) + + if self.reverse_out: + fpn_name_list = fpn_name_list[::-1] res_dict = OrderedDict([(k, fpn_dict[k]) for k in fpn_name_list]) return res_dict, spatial_scale diff --git a/ppdet/modeling/backbones/mobilenet_v3.py b/ppdet/modeling/backbones/mobilenet_v3.py index a65f23753..2b0b309ff 100644 --- a/ppdet/modeling/backbones/mobilenet_v3.py +++ b/ppdet/modeling/backbones/mobilenet_v3.py @@ -45,10 +45,11 @@ class MobileNetV3(object): feature_maps (list): index of stages whose feature maps are returned. extra_block_filters (list): number of filter for each extra block. lr_mult_list (list): learning rate ratio of different blocks, lower learning rate ratio - is need for pretrained model got using distillation(default as + is need for pretrained model got using distillation(default as [1.0, 1.0, 1.0, 1.0, 1.0]). - freeze_norm (bool): freeze normalization layers - feature_maps (list): feature maps used in two-stage rcnn models(default as None). + freeze_norm (bool): freeze normalization layers. + multiplier (float): The multiplier by which to reduce the convolution expansion and + number of channels. """ __shared__ = ['norm_type'] @@ -62,7 +63,8 @@ class MobileNetV3(object): norm_decay=0.0, extra_block_filters=[[256, 512], [128, 256], [128, 256], [64, 128]], lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0], - freeze_norm=False, ): + freeze_norm=False, + multiplier=1.0): if isinstance(feature_maps, Integral): feature_maps = [feature_maps] @@ -122,6 +124,13 @@ class MobileNetV3(object): else: raise NotImplementedError + if multiplier != 1.0: + self.cfg[-3][2] = int(self.cfg[-3][2] * multiplier) + self.cfg[-2][1] = int(self.cfg[-2][1] * multiplier) + self.cfg[-2][2] = int(self.cfg[-2][2] * multiplier) + self.cfg[-1][1] = int(self.cfg[-1][1] * multiplier) + self.cfg[-1][2] = int(self.cfg[-1][2] * multiplier) + def _conv_bn_layer(self, input, filter_size, @@ -279,21 +288,25 @@ class MobileNetV3(object): if self.block_stride in self.feature_maps: self.end_points.append(conv0) - conv1 = self._conv_bn_layer( - input=conv0, - filter_size=filter_size, - num_filters=num_mid_filter, - stride=stride, - padding=int((filter_size - 1) // 2), - if_act=True, - act=act, - num_groups=num_mid_filter, - use_cudnn=False, - name=name + '_depthwise') + with fluid.name_scope('res_conv1'): + conv1 = self._conv_bn_layer( + input=conv0, + filter_size=filter_size, + num_filters=num_mid_filter, + stride=stride, + padding=int((filter_size - 1) // 2), + if_act=True, + act=act, + num_groups=num_mid_filter, + use_cudnn=False, + name=name + '_depthwise') if use_se: - conv1 = self._se_block( - input=conv1, num_out_filter=num_mid_filter, name=name + '_se') + with fluid.name_scope('se_block'): + conv1 = self._se_block( + input=conv1, + num_out_filter=num_mid_filter, + name=name + '_se') conv2 = self._conv_bn_layer( input=conv1, -- GitLab