From fb38470f1cb032da4786fa35959b1b4969514a77 Mon Sep 17 00:00:00 2001 From: wangxinxin08 <69842442+wangxinxin08@users.noreply.github.com> Date: Thu, 15 Apr 2021 09:54:04 +0800 Subject: [PATCH] [cherry-pick] add ppyolov2 (#2628) * add ppyolov2 * fix bugs and modify docs * modify code and doc according to review * fix bugs while resolving conflicts --- configs/ppyolo/README.md | 56 ++- configs/ppyolo/README_cn.md | 55 ++- configs/ppyolo/_base_/optimizer_365e.yml | 21 ++ configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml | 65 ++++ configs/ppyolo/_base_/ppyolov2_reader.yml | 43 +++ .../ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml | 20 + .../ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml | 10 + ppdet/modeling/backbones/darknet.py | 4 +- ppdet/modeling/losses/iou_aware_loss.py | 4 +- ppdet/modeling/necks/yolo_fpn.py | 350 ++++++++++++++++-- ppdet/modeling/ops.py | 5 + static/configs/ppyolo/README.md | 45 +-- static/configs/ppyolo/README_cn.md | 44 +-- static/configs/ppyolo/ppyolov2_r101vd_dcn.yml | 89 +++++ static/configs/ppyolo/ppyolov2_r50vd_dcn.yml | 89 +++++ static/configs/ppyolo/ppyolov2_reader.yml | 111 ++++++ .../ppdet/modeling/anchor_heads/yolo_head.py | 234 +++++++++++- .../ppdet/modeling/losses/iou_aware_loss.py | 3 +- static/ppdet/modeling/losses/yolo_loss.py | 1 - 19 files changed, 1129 insertions(+), 120 deletions(-) create mode 100644 configs/ppyolo/_base_/optimizer_365e.yml create mode 100644 configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml create mode 100644 configs/ppyolo/_base_/ppyolov2_reader.yml create mode 100644 configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml create mode 100644 configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml create mode 100644 static/configs/ppyolo/ppyolov2_r101vd_dcn.yml create mode 100644 static/configs/ppyolo/ppyolov2_r50vd_dcn.yml create mode 100644 static/configs/ppyolo/ppyolov2_reader.yml diff --git a/configs/ppyolo/README.md b/configs/ppyolo/README.md index 91a1f4ec0..ba73b470a 100644 --- a/configs/ppyolo/README.md +++ b/configs/ppyolo/README.md @@ -38,17 +38,20 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | backbone | input shape | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config | |:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r50vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r101vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml) | + **Notes:** @@ -62,8 +65,8 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | Model Size | input shape | Box APval | Box AP50val | Kirin 990 1xCore(FPS) | download | config | |:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | **Notes:** @@ -91,9 +94,9 @@ PP-YOLO trained on Pascal VOC dataset as follows: | Model | GPU number | images/GPU | backbone | input shape | Box AP50val | download | config | |:------------------:|:----------:|:----------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | ## Getting Start @@ -184,4 +187,23 @@ Optimizing method and ablation experiments of PP-YOLO compared with YOLOv3. - Performance and inference spedd are measure with input shape as 608 - All models are trained on COCO train2017 datast and evaluated on val2017 & test-dev2017 dataset,`Box AP` is evaluation results as `mAP(IoU=0.5:0.95)`. - Inference speed is tested on single Tesla V100 with batch size as 1 following test method and environment configuration in benchmark above. -- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) with mAP as 39.0 is optimized YOLOv3 model in PaddleDetection,see [Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) for details. +- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) with mAP as 39.0 is optimized YOLOv3 model in PaddleDetection,see [Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) for details. + +## Citation + +``` +@misc{long2020ppyolo, +title={PP-YOLO: An Effective and Efficient Implementation of Object Detector}, +author={Xiang Long and Kaipeng Deng and Guanzhong Wang and Yang Zhang and Qingqing Dang and Yuan Gao and Hui Shen and Jianguo Ren and Shumin Han and Errui Ding and Shilei Wen}, +year={2020}, +eprint={2007.12099}, +archivePrefix={arXiv}, +primaryClass={cs.CV} +} +@misc{ppdet2019, +title={PaddleDetection, Object detection and instance segmentation toolkit based on PaddlePaddle.}, +author={PaddlePaddle Authors}, +howpublished = {\url{https://github.com/PaddlePaddle/PaddleDetection}}, +year={2019} +} +``` diff --git a/configs/ppyolo/README_cn.md b/configs/ppyolo/README_cn.md index 648d15fe4..9de54d118 100644 --- a/configs/ppyolo/README_cn.md +++ b/configs/ppyolo/README_cn.md @@ -38,17 +38,19 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 | |:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r50vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r101vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml) | **注意:** @@ -63,8 +65,8 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 模型体积 | 输入尺寸 | Box APval | Box AP50val | Kirin 990 1xCore (FPS) | 模型下载 | 配置文件 | |:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | - PP-YOLO_MobileNetV3 模型使用COCO数据集中train2017作为训练集,使用val2017作为测试集,Box APval为`mAP(IoU=0.5:0.95)`评估结果, Box AP50val为`mAP(IoU=0.5)`评估结果。 - PP-YOLO_MobileNetV3 模型训练过程中使用4GPU,每GPU batch size为32进行训练,如训练GPU数和batch size不使用上述配置,须参考[FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/FAQ.md)调整学习率和迭代次数。 @@ -76,9 +78,9 @@ PP-YOLO在Pascal VOC数据集上训练模型如下: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box AP50val | 模型下载 | 配置文件 | |:------------------:|:-------:|:-------------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | ## 使用说明 @@ -169,4 +171,23 @@ PP-YOLO模型相对于YOLOv3模型优化项消融实验数据如下表所示。 - 精度与推理速度数据均为使用输入图像尺寸为608的测试结果 - Box AP为在COCO train2017数据集训练,val2017和test-dev2017数据集上评估`mAP(IoU=0.5:0.95)`数据 - 推理速度为单卡V100上,batch size=1, 使用上述benchmark测试方法的测试结果,测试环境配置为CUDA 10.2,CUDNN 7.5.1 -- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml)精度38.9为PaddleDetection优化后的YOLOv3模型,可参见[模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) +- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml)精度38.9为PaddleDetection优化后的YOLOv3模型,可参见[模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) + +## 引用 + +``` +@misc{long2020ppyolo, +title={PP-YOLO: An Effective and Efficient Implementation of Object Detector}, +author={Xiang Long and Kaipeng Deng and Guanzhong Wang and Yang Zhang and Qingqing Dang and Yuan Gao and Hui Shen and Jianguo Ren and Shumin Han and Errui Ding and Shilei Wen}, +year={2020}, +eprint={2007.12099}, +archivePrefix={arXiv}, +primaryClass={cs.CV} +} +@misc{ppdet2019, +title={PaddleDetection, Object detection and instance segmentation toolkit based on PaddlePaddle.}, +author={PaddlePaddle Authors}, +howpublished = {\url{https://github.com/PaddlePaddle/PaddleDetection}}, +year={2019} +} +``` diff --git a/configs/ppyolo/_base_/optimizer_365e.yml b/configs/ppyolo/_base_/optimizer_365e.yml new file mode 100644 index 000000000..d834a4ce0 --- /dev/null +++ b/configs/ppyolo/_base_/optimizer_365e.yml @@ -0,0 +1,21 @@ +epoch: 365 + +LearningRate: + base_lr: 0.005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 243 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + clip_grad_by_norm: 35. + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml b/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml new file mode 100644 index 000000000..6288adeed --- /dev/null +++ b/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml @@ -0,0 +1,65 @@ +architecture: YOLOv3 +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: ResNet + neck: PPYOLOPAN + yolo_head: YOLOv3Head + post_process: BBoxPostProcess + +ResNet: + depth: 50 + variant: d + return_idx: [1, 2, 3] + dcn_v2_stages: [3] + freeze_at: -1 + freeze_norm: false + norm_decay: 0. + +PPYOLOPAN: + drop_block: true + block_size: 3 + keep_prob: 0.9 + spp: true + +YOLOv3Head: + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + loss: YOLOv3Loss + iou_aware: true + iou_aware_factor: 0.5 + +YOLOv3Loss: + ignore_thresh: 0.7 + downsample: [32, 16, 8] + label_smooth: false + scale_x_y: 1.05 + iou_loss: IouLoss + iou_aware_loss: IouAwareLoss + +IouLoss: + loss_weight: 2.5 + loss_square: true + +IouAwareLoss: + loss_weight: 1.0 + +BBoxPostProcess: + decode: + name: YOLOBox + conf_thresh: 0.01 + downsample_ratio: 32 + clip_bbox: true + scale_x_y: 1.05 + nms: + name: MatrixNMS + keep_top_k: 100 + score_threshold: 0.01 + post_threshold: 0.01 + nms_top_k: -1 + background_label: -1 diff --git a/configs/ppyolo/_base_/ppyolov2_reader.yml b/configs/ppyolo/_base_/ppyolov2_reader.yml new file mode 100644 index 000000000..747253131 --- /dev/null +++ b/configs/ppyolo/_base_/ppyolov2_reader.yml @@ -0,0 +1,43 @@ +worker_num: 8 +TrainReader: + inputs_def: + num_max_boxes: 100 + sample_transforms: + - Decode: {} + - Mixup: {alpha: 1.5, beta: 1.5} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeBox: {} + - PadBox: {num_max_boxes: 100} + - BboxXYXY2XYWH: {} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]} + batch_size: 12 + shuffle: true + drop_last: true + mixup_epoch: 25000 + use_shared_memory: true + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_size: 8 + drop_empty: false + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_size: 1 diff --git a/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml b/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml new file mode 100644 index 000000000..0f1aee746 --- /dev/null +++ b/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml @@ -0,0 +1,20 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + './_base_/ppyolov2_r50vd_dcn.yml', + './_base_/optimizer_365e.yml', + './_base_/ppyolov2_reader.yml', +] + +snapshot_epoch: 8 +weights: output/ppyolov2_r101vd_dcn_365e_coco/model_final +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams + +ResNet: + depth: 101 + variant: d + return_idx: [1, 2, 3] + dcn_v2_stages: [3] + freeze_at: -1 + freeze_norm: false + norm_decay: 0. diff --git a/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml b/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml new file mode 100644 index 000000000..a5e1bc335 --- /dev/null +++ b/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml @@ -0,0 +1,10 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + './_base_/ppyolov2_r50vd_dcn.yml', + './_base_/optimizer_365e.yml', + './_base_/ppyolov2_reader.yml', +] + +snapshot_epoch: 8 +weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final diff --git a/ppdet/modeling/backbones/darknet.py b/ppdet/modeling/backbones/darknet.py index 9bf0cdaa9..13af903d4 100755 --- a/ppdet/modeling/backbones/darknet.py +++ b/ppdet/modeling/backbones/darknet.py @@ -18,7 +18,7 @@ import paddle.nn.functional as F from paddle import ParamAttr from paddle.regularizer import L2Decay from ppdet.core.workspace import register, serializable -from ppdet.modeling.ops import batch_norm +from ppdet.modeling.ops import batch_norm, mish from ..shape_spec import ShapeSpec __all__ = ['DarkNet', 'ConvBNLayer'] @@ -77,6 +77,8 @@ class ConvBNLayer(nn.Layer): out = self.batch_norm(out) if self.act == 'leaky': out = F.leaky_relu(out, 0.1) + elif self.act == 'mish': + out = mish(out) return out diff --git a/ppdet/modeling/losses/iou_aware_loss.py b/ppdet/modeling/losses/iou_aware_loss.py index f5599588c..1e6aa8bf0 100644 --- a/ppdet/modeling/losses/iou_aware_loss.py +++ b/ppdet/modeling/losses/iou_aware_loss.py @@ -42,7 +42,7 @@ class IouAwareLoss(IouLoss): iou = bbox_iou( pbox, gbox, giou=self.giou, diou=self.diou, ciou=self.ciou) iou.stop_gradient = True - ioup = F.sigmoid(ioup) - loss_iou_aware = (-iou * paddle.log(ioup)).sum(-2, keepdim=True) + loss_iou_aware = F.binary_cross_entropy_with_logits( + ioup, iou, reduction='none') loss_iou_aware = loss_iou_aware * self.loss_weight return loss_iou_aware diff --git a/ppdet/modeling/necks/yolo_fpn.py b/ppdet/modeling/necks/yolo_fpn.py index 15309eac6..873e43f0e 100644 --- a/ppdet/modeling/necks/yolo_fpn.py +++ b/ppdet/modeling/necks/yolo_fpn.py @@ -25,6 +25,32 @@ from ..shape_spec import ShapeSpec __all__ = ['YOLOv3FPN', 'PPYOLOFPN'] +def add_coord(x): + b = x.shape[0] + if self.data_format == 'NCHW': + h = x.shape[2] + w = x.shape[3] + else: + h = x.shape[1] + w = x.shape[2] + + gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1. + if self.data_format == 'NCHW': + gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w]) + else: + gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1]) + gx.stop_gradient = True + + gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1. + if self.data_format == 'NCHW': + gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w]) + else: + gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1]) + gy.stop_gradient = True + + return gx, gy + + class YoloDetBlock(nn.Layer): def __init__(self, ch_in, channel, norm_type, name, data_format='NCHW'): """ @@ -87,6 +113,7 @@ class SPP(nn.Layer): pool_size, norm_type, name, + act='leaky', data_format='NCHW'): """ SPP layer, which consist of four pooling layer follwed by conv layer @@ -101,6 +128,7 @@ class SPP(nn.Layer): """ super(SPP, self).__init__() self.pool = [] + self.data_format = data_format for size in pool_size: pool = self.add_sublayer( '{}.pool1'.format(name), @@ -118,13 +146,18 @@ class SPP(nn.Layer): padding=k // 2, norm_type=norm_type, name=name, + act=act, data_format=data_format) def forward(self, x): outs = [x] for pool in self.pool: outs.append(pool(x)) - y = paddle.concat(outs, axis=1) + if self.data_format == "NCHW": + y = paddle.concat(outs, axis=1) + else: + y = paddle.concat(outs, axis=-1) + y = self.conv(y) return y @@ -204,28 +237,7 @@ class CoordConv(nn.Layer): self.data_format = data_format def forward(self, x): - b = x.shape[0] - if self.data_format == 'NCHW': - h = x.shape[2] - w = x.shape[3] - else: - h = x.shape[1] - w = x.shape[2] - - gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1. - if self.data_format == 'NCHW': - gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w]) - else: - gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1]) - gx.stop_gradient = True - - gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1. - if self.data_format == 'NCHW': - gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w]) - else: - gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1]) - gy.stop_gradient = True - + gx, gy = add_coord(x) if self.data_format == 'NCHW': y = paddle.concat([x, gx, gy], axis=1) else: @@ -273,7 +285,6 @@ class PPYOLOTinyDetBlock(nn.Layer): data_format='NCHW'): """ PPYOLO Tiny DetBlock layer - Args: ch_in (list): input channel number ch_out (list): output channel number @@ -333,6 +344,73 @@ class PPYOLOTinyDetBlock(nn.Layer): return route, tip +class PPYOLODetBlockCSP(nn.Layer): + def __init__(self, + cfg, + ch_in, + ch_out, + act, + norm_type, + name, + data_format='NCHW'): + """ + PPYOLODetBlockCSP layer + + Args: + cfg (list): layer configs for this block + ch_in (int): input channel + ch_out (int): output channel + act (str): default mish + name (str): block name + data_format (str): data format, NCHW or NHWC + """ + super(PPYOLODetBlockCSP, self).__init__() + self.data_format = data_format + self.conv1 = ConvBNLayer( + ch_in, + ch_out, + 1, + padding=0, + act=act, + norm_type=norm_type, + name=name + '.left', + data_format=data_format) + self.conv2 = ConvBNLayer( + ch_in, + ch_out, + 1, + padding=0, + act=act, + norm_type=norm_type, + name=name + '.right', + data_format=data_format) + self.conv3 = ConvBNLayer( + ch_out * 2, + ch_out * 2, + 1, + padding=0, + act=act, + norm_type=norm_type, + name=name, + data_format=data_format) + self.conv_module = nn.Sequential() + for idx, (layer_name, layer, args, kwargs) in enumerate(cfg): + kwargs.update(name=name + layer_name, data_format=data_format) + self.conv_module.add_sublayer(layer_name, layer(*args, **kwargs)) + + def forward(self, inputs): + conv_left = self.conv1(inputs) + conv_right = self.conv2(inputs) + conv_left = self.conv_module(conv_left) + if self.data_format == 'NCHW': + conv = paddle.concat([conv_left, conv_right], axis=1) + else: + conv = paddle.concat([conv_left, conv_right], axis=-1) + + conv = self.conv3(conv) + return conv, conv + + @register @serializable class YOLOv3FPN(nn.Layer): @@ -430,7 +508,12 @@ class PPYOLOFPN(nn.Layer): in_channels=[512, 1024, 2048], norm_type='bn', data_format='NCHW', - **kwargs): + coord_conv=False, + conv_block_num=3, + drop_block=False, + block_size=3, + keep_prob=0.9, + spp=False): """ PPYOLOFPN layer @@ -438,7 +521,12 @@ class PPYOLOFPN(nn.Layer): in_channels (list): input channels for fpn norm_type (str): batch norm type, default bn data_format (str): data format, NCHW or NHWC - kwargs: extra key-value pairs, such as parameter of DropBlock and spp + coord_conv (bool): whether use CoordConv or not + conv_block_num (int): conv block num of each pan block + drop_block (bool): whether use DropBlock or not + block_size (int): block size of DropBlock + keep_prob (float): keep probability of DropBlock + spp (bool): whether use spp or not """ super(PPYOLOFPN, self).__init__() @@ -446,14 +534,12 @@ class PPYOLOFPN(nn.Layer): self.in_channels = in_channels self.num_blocks = len(in_channels) # parse kwargs - self.coord_conv = kwargs.get('coord_conv', False) - self.drop_block = kwargs.get('drop_block', False) - if self.drop_block: - self.block_size = kwargs.get('block_size', 3) - self.keep_prob = kwargs.get('keep_prob', 0.9) - - self.spp = kwargs.get('spp', False) - self.conv_block_num = kwargs.get('conv_block_num', 2) + self.coord_conv = coord_conv + self.drop_block = drop_block + self.block_size = block_size + self.keep_prob = keep_prob + self.spp = spp + self.conv_block_num = conv_block_num self.data_format = data_format if self.coord_conv: ConvLayer = CoordConv @@ -583,14 +669,12 @@ class PPYOLOTinyFPN(nn.Layer): **kwargs): """ PPYOLO Tiny FPN layer - Args: in_channels (list): input channels for fpn detection_block_channels (list): channels in fpn norm_type (str): batch norm type, default bn data_format (str): data format, NCHW or NHWC kwargs: extra key-value pairs, such as parameter of DropBlock and spp - """ super(PPYOLOTinyFPN, self).__init__() assert len(in_channels) > 0, "in_channels length should > 0" @@ -681,3 +765,197 @@ class PPYOLOTinyFPN(nn.Layer): @property def out_shape(self): return [ShapeSpec(channels=c) for c in self._out_channels] + + +@register +@serializable +class PPYOLOPAN(nn.Layer): + __shared__ = ['norm_type', 'data_format'] + + def __init__(self, + in_channels=[512, 1024, 2048], + norm_type='bn', + data_format='NCHW', + act='mish', + conv_block_num=3, + drop_block=False, + block_size=3, + keep_prob=0.9, + spp=False): + """ + PPYOLOPAN layer with SPP, DropBlock and CSP connection. + + Args: + in_channels (list): input channels for fpn + norm_type (str): batch norm type, default bn + data_format (str): data format, NCHW or NHWC + act (str): activation function, default mish + conv_block_num (int): conv block num of each pan block + drop_block (bool): whether use DropBlock or not + block_size (int): block size of DropBlock + keep_prob (float): keep probability of DropBlock + spp (bool): whether use spp or not + + """ + super(PPYOLOPAN, self).__init__() + assert len(in_channels) > 0, "in_channels length should > 0" + self.in_channels = in_channels + self.num_blocks = len(in_channels) + # parse kwargs + self.drop_block = drop_block + self.block_size = block_size + self.keep_prob = keep_prob + self.spp = spp + self.conv_block_num = conv_block_num + self.data_format = data_format + if self.drop_block: + dropblock_cfg = [[ + 'dropblock', DropBlock, [self.block_size, self.keep_prob], + dict() + ]] + else: + dropblock_cfg = [] + + # fpn + self.fpn_blocks = [] + self.fpn_routes = [] + fpn_channels = [] + for i, ch_in in enumerate(self.in_channels[::-1]): + if i > 0: + ch_in += 512 // (2**(i - 1)) + channel = 512 // (2**i) + base_cfg = [] + for j in range(self.conv_block_num): + base_cfg += [ + # name, layer, args + [ + '{}.0'.format(j), ConvBNLayer, [channel, channel, 1], + dict( + padding=0, act=act, norm_type=norm_type) + ], + [ + '{}.1'.format(j), ConvBNLayer, [channel, channel, 3], + dict( + padding=1, act=act, norm_type=norm_type) + ] + ] + + if i == 0 and self.spp: + base_cfg[3] = [ + 'spp', SPP, [channel * 4, channel, 1], dict( + pool_size=[5, 9, 13], act=act, norm_type=norm_type) + ] + + cfg = base_cfg[:4] + dropblock_cfg + base_cfg[4:] + name = 'fpn.{}'.format(i) + fpn_block = self.add_sublayer( + name, + PPYOLODetBlockCSP(cfg, ch_in, channel, act, norm_type, name, + data_format)) + self.fpn_blocks.append(fpn_block) + fpn_channels.append(channel * 2) + if i < self.num_blocks - 1: + name = 'fpn_transition.{}'.format(i) + route = self.add_sublayer( + name, + ConvBNLayer( + ch_in=channel * 2, + ch_out=channel, + filter_size=1, + stride=1, + padding=0, + act=act, + norm_type=norm_type, + data_format=data_format, + name=name)) + self.fpn_routes.append(route) + # pan + self.pan_blocks = [] + self.pan_routes = [] + self._out_channels = [512 // (2**(self.num_blocks - 2)), ] + for i in reversed(range(self.num_blocks - 1)): + name = 'pan_transition.{}'.format(i) + route = self.add_sublayer( + name, + ConvBNLayer( + ch_in=fpn_channels[i + 1], + ch_out=fpn_channels[i + 1], + filter_size=3, + stride=2, + padding=1, + act=act, + norm_type=norm_type, + data_format=data_format, + name=name)) + self.pan_routes = [route, ] + self.pan_routes + base_cfg = [] + ch_in = fpn_channels[i] + fpn_channels[i + 1] + channel = 512 // (2**i) + for j in range(self.conv_block_num): + base_cfg += [ + # name, layer, args + [ + '{}.0'.format(j), ConvBNLayer, [channel, channel, 1], + dict( + padding=0, act=act, norm_type=norm_type) + ], + [ + '{}.1'.format(j), ConvBNLayer, [channel, channel, 3], + dict( + padding=1, act=act, norm_type=norm_type) + ] + ] + + cfg = base_cfg[:4] + dropblock_cfg + base_cfg[4:] + name = 'pan.{}'.format(i) + pan_block = self.add_sublayer( + name, + PPYOLODetBlockCSP(cfg, ch_in, channel, act, norm_type, name, + data_format)) + + self.pan_blocks = [pan_block, ] + self.pan_blocks + self._out_channels.append(channel * 2) + + self._out_channels = self._out_channels[::-1] + + def forward(self, blocks): + assert len(blocks) == self.num_blocks + blocks = blocks[::-1] + # fpn + fpn_feats = [] + for i, block in enumerate(blocks): + if i > 0: + if self.data_format == 'NCHW': + block = paddle.concat([route, block], axis=1) + else: + block = paddle.concat([route, block], axis=-1) + route, tip = self.fpn_blocks[i](block) + fpn_feats.append(tip) + + if i < self.num_blocks - 1: + route = self.fpn_routes[i](route) + route = F.interpolate( + route, scale_factor=2., data_format=self.data_format) + + pan_feats = [fpn_feats[-1], ] + route = fpn_feats[self.num_blocks - 1] + for i in reversed(range(self.num_blocks - 1)): + block = fpn_feats[i] + route = self.pan_routes[i](route) + if self.data_format == 'NCHW': + block = paddle.concat([route, block], axis=1) + else: + block = paddle.concat([route, block], axis=-1) + + route, tip = self.pan_blocks[i](block) + pan_feats.append(tip) + + return pan_feats[::-1] + + @classmethod + def from_config(cls, cfg, input_shape): + return {'in_channels': [i.channels for i in input_shape], } + + @property + def out_shape(self): + return [ShapeSpec(channels=c) for c in self._out_channels] diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index cbef3d6b4..e2c193030 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -41,9 +41,14 @@ __all__ = [ 'collect_fpn_proposals', 'matrix_nms', 'batch_norm', + 'mish', ] +def mish(x): + return x * paddle.tanh(F.softplus(x)) + + def batch_norm(ch, norm_type='bn', norm_decay=0., diff --git a/static/configs/ppyolo/README.md b/static/configs/ppyolo/README.md index f8f707634..0f363971c 100644 --- a/static/configs/ppyolo/README.md +++ b/static/configs/ppyolo/README.md @@ -38,21 +38,24 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | backbone | input shape | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config | |:------------------------:|:----------:|:----------:|:----------:| :----------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :-----: | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r50vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r101vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml) | + **Notes:** @@ -69,8 +72,8 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | Model Size | input shape | Box APval | Box AP50val | Kirin 990 1xCore(FPS) | download | inference model download | config | |:----------------------------:|:----------:|:----------:| :--------: | :----------:| :------------------: | :--------------------: | :-------------------: | :------: | :----------------------: | :-----: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 15.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 28.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 15.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 28.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | **Notes:** @@ -82,7 +85,7 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | Prune Ratio | Teacher Model | Model Size | input shape | Box APval | Kirin 990 1xCore(FPS) | download | inference model download | config | |:----------------------------:|:----------:|:----------:| :---------: | :-----------------------: | :--------: | :----------:| :------------------: | :-------------------: | :------: | :----------------------: | :-----: | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | - Slim PP-YOLO is trained by slim traing method from [Distill pruned model](../../slim/extentions/distill_pruned_model/README.md),distill training pruned PP-YOLO_MobileNetV3_small model with PP-YOLO_MobileNetV3_large model as the teacher model - Pruning detectiom head of PP-YOLO model with ratio as 75%, while the arguments are `--pruned_params="yolo_block.0.2.conv.weights,yolo_block.0.tip.conv.weights,yolo_block.1.2.conv.weights,yolo_block.1.tip.conv.weights" --pruned_ratios="0.75,0.75,0.75,0.75"` @@ -108,9 +111,9 @@ PP-YOLO trained on Pascal VOC dataset as follows: | Model | GPU number | images/GPU | backbone | input shape | Box AP50val | download | config | |:------------------:|:----------:|:----------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | ## Getting Start diff --git a/static/configs/ppyolo/README_cn.md b/static/configs/ppyolo/README_cn.md index a811a7e0d..392436ef4 100644 --- a/static/configs/ppyolo/README_cn.md +++ b/static/configs/ppyolo/README_cn.md @@ -38,21 +38,23 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 | |:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r50vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r101vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml) | **注意:** @@ -70,8 +72,8 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 模型体积 | 输入尺寸 | Box APval | Box AP50val | Kirin 990 1xCore (FPS) | 模型下载 | 预测模型下载 | 配置文件 | |:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :----------: | :------: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | - PP-YOLO_MobileNetV3 模型使用COCO数据集中train2017作为训练集,使用val2017作为测试集,Box APval为`mAP(IoU=0.5:0.95)`评估结果, Box AP50val为`mAP(IoU=0.5)`评估结果。 - PP-YOLO_MobileNetV3 模型训练过程中使用4GPU,每GPU batch size为32进行训练,如训练GPU数和batch size不使用上述配置,须参考[FAQ](../../docs/FAQ.md)调整学习率和迭代次数。 @@ -81,7 +83,7 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU 个数 | 每GPU图片个数 | 裁剪率 | Teacher模型 | 模型体积 | 输入尺寸 | Box APval | Kirin 990 1xCore (FPS) | 模型下载 | 预测模型下载 | 配置文件 | |:----------------------------:|:----------:|:-------------:| :---------: | :-----------------------: | :--------: | :----------:| :------------------: | :--------------------: | :------: | :----------: | :------: | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | - PP-YOLO 轻量级裁剪模型采用[蒸馏通道剪裁模型](../../slim/extentions/distill_pruned_model/README.md) 的方式训练得到,基于 PP-YOLO_MobileNetV3_small 模型对Head部分做卷积通道剪裁后使用 PP-YOLO_MobileNetV3_large 模型进行蒸馏训练 - 卷积通道检测对Head部分剪裁掉75%的通道数,及剪裁参数为`--pruned_params="yolo_block.0.2.conv.weights,yolo_block.0.tip.conv.weights,yolo_block.1.2.conv.weights,yolo_block.1.tip.conv.weights" --pruned_ratios="0.75,0.75,0.75,0.75"` @@ -105,9 +107,9 @@ PP-YOLO在Pascal VOC数据集上训练模型如下: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box AP50val | 模型下载 | 配置文件 | |:------------------:|:-------:|:-------------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | ## 使用说明 diff --git a/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml b/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml new file mode 100644 index 000000000..9ba339912 --- /dev/null +++ b/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml @@ -0,0 +1,89 @@ +architecture: YOLOv3 +use_gpu: true +max_iters: 450000 +log_iter: 100 +save_dir: output +snapshot_iter: 10000 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar +weights: output/ppyolov2_r101vd_dcn/model_final +num_classes: 80 +use_fine_grained_loss: true +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: ResNet + yolo_head: YOLOv3PANHead + use_fine_grained_loss: true + +ResNet: + norm_type: sync_bn + freeze_at: 0 + freeze_norm: false + norm_decay: 0. + depth: 101 + feature_maps: [3, 4, 5] + variant: d + dcn_v2_stages: [5] + +YOLOv3PANHead: + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + norm_decay: 0. + iou_aware: true + iou_aware_factor: 0.5 + scale_x_y: 1.05 + spp: true + yolo_loss: YOLOv3Loss + nms: MatrixNMS + drop_block: true + +YOLOv3Loss: + ignore_thresh: 0.7 + scale_x_y: 1.05 + label_smooth: false + use_fine_grained_loss: true + iou_loss: IouLoss + iou_aware_loss: IouAwareLoss + +IouLoss: + loss_weight: 2.5 + max_height: 768 + max_width: 768 + +IouAwareLoss: + loss_weight: 1.0 + max_height: 768 + max_width: 768 + +MatrixNMS: + background_label: -1 + keep_top_k: 100 + normalized: false + score_threshold: 0.01 + post_threshold: 0.01 + +LearningRate: + base_lr: 0.005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 300000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + clip_grad_by_norm: 35. + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'ppyolov2_reader.yml' diff --git a/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml b/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml new file mode 100644 index 000000000..7ceb75833 --- /dev/null +++ b/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml @@ -0,0 +1,89 @@ +architecture: YOLOv3 +use_gpu: true +max_iters: 450000 +log_iter: 100 +save_dir: output +snapshot_iter: 10000 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar +weights: output/ppyolov2_r50vd_dcn/model_final +num_classes: 80 +use_fine_grained_loss: true +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: ResNet + yolo_head: YOLOv3PANHead + use_fine_grained_loss: true + +ResNet: + norm_type: sync_bn + freeze_at: 0 + freeze_norm: false + norm_decay: 0. + depth: 50 + feature_maps: [3, 4, 5] + variant: d + dcn_v2_stages: [5] + +YOLOv3PANHead: + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + norm_decay: 0. + iou_aware: true + iou_aware_factor: 0.5 + scale_x_y: 1.05 + spp: true + yolo_loss: YOLOv3Loss + nms: MatrixNMS + drop_block: true + +YOLOv3Loss: + ignore_thresh: 0.7 + scale_x_y: 1.05 + label_smooth: false + use_fine_grained_loss: true + iou_loss: IouLoss + iou_aware_loss: IouAwareLoss + +IouLoss: + loss_weight: 2.5 + max_height: 768 + max_width: 768 + +IouAwareLoss: + loss_weight: 1.0 + max_height: 768 + max_width: 768 + +MatrixNMS: + background_label: -1 + keep_top_k: 100 + normalized: false + score_threshold: 0.01 + post_threshold: 0.01 + +LearningRate: + base_lr: 0.005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 300000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + clip_grad_by_norm: 35. + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'ppyolov2_reader.yml' diff --git a/static/configs/ppyolo/ppyolov2_reader.yml b/static/configs/ppyolo/ppyolov2_reader.yml new file mode 100644 index 000000000..02a385c18 --- /dev/null +++ b/static/configs/ppyolo/ppyolov2_reader.yml @@ -0,0 +1,111 @@ +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 100 + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: True + - !MixupImage + alpha: 1.5 + beta: 1.5 + - !ColorDistort {} + - !RandomExpand + ratio: 2.0 + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop {} + - !RandomFlipImage + is_normalized: false + - !NormalizeBox {} + - !PadBox + num_max_boxes: 100 + - !BboxXYXY2XYWH {} + batch_transforms: + - !RandomShape + sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768] + random_inter: True + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + # Gt2YoloTarget is only used when use_fine_grained_loss set as true, + # this operator will be deleted automatically if use_fine_grained_loss + # is set as false + - !Gt2YoloTarget + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + downsample_ratios: [32, 16, 8] + batch_size: 12 + shuffle: true + mixup_epoch: 25000 + drop_last: true + worker_num: 8 + bufsize: 4 + use_process: true + +EvalReader: + inputs_def: + fields: ['image', 'im_size', 'im_id'] + num_max_boxes: 100 + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !ResizeImage + target_size: 640 + interp: 2 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !PadBox + num_max_boxes: 50 + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + drop_empty: false + worker_num: 8 + bufsize: 4 + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + fields: ['image', 'im_size', 'im_id'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !ResizeImage + target_size: 640 + interp: 2 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/static/ppdet/modeling/anchor_heads/yolo_head.py b/static/ppdet/modeling/anchor_heads/yolo_head.py index c7cf14822..49b211ff6 100644 --- a/static/ppdet/modeling/anchor_heads/yolo_head.py +++ b/static/ppdet/modeling/anchor_heads/yolo_head.py @@ -192,6 +192,8 @@ class YOLOv3Head(object): if act == 'leaky': out = fluid.layers.leaky_relu(x=out, alpha=0.1) + elif act == 'mish': + out = fluid.layers.mish(out) return out def _spp_module(self, input, name=""): @@ -657,7 +659,6 @@ class YOLOv4Head(YOLOv3Head): class PPYOLOTinyHead(YOLOv3Head): """ Head block for YOLOv3 network - Args: norm_decay (float): weight decay for normalization layer weights num_classes (int): number of output classes @@ -781,11 +782,9 @@ class PPYOLOTinyHead(YOLOv3Head): def _get_outputs(self, input, is_train=True): """ Get PP-YOLO tiny head output - Args: input (list): List of Variables, output of backbone stages is_train (bool): whether in train or test mode - Returns: outputs (list): Variables of each output layer """ @@ -838,3 +837,232 @@ class PPYOLOTinyHead(YOLOv3Head): route = self._upsample(route) return outputs + + +@register +class YOLOv3PANHead(YOLOv3Head): + """ + Head block for YOLOv3PANHead network + + Args: + conv_block_num (int): number of conv block in each detection block + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + __inject__ = ['yolo_loss', 'nms'] + __shared__ = ['num_classes', 'weight_prefix_name'] + + def __init__(self, + conv_block_num=3, + norm_decay=0., + num_classes=80, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], + [59, 119], [116, 90], [156, 198], [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + drop_block=False, + iou_aware=False, + iou_aware_factor=0.4, + block_size=3, + keep_prob=0.9, + yolo_loss="YOLOv3Loss", + spp=False, + nms=MultiClassNMS( + score_threshold=0.01, + nms_top_k=1000, + keep_top_k=100, + nms_threshold=0.45, + background_label=-1).__dict__, + weight_prefix_name='', + downsample=[32, 16, 8], + scale_x_y=1.0, + clip_bbox=True, + act='mish'): + super(YOLOv3PANHead, self).__init__( + conv_block_num=conv_block_num, + norm_decay=norm_decay, + num_classes=num_classes, + anchors=anchors, + anchor_masks=anchor_masks, + drop_block=drop_block, + iou_aware=iou_aware, + iou_aware_factor=iou_aware_factor, + block_size=block_size, + keep_prob=keep_prob, + yolo_loss=yolo_loss, + spp=spp, + nms=nms, + weight_prefix_name=weight_prefix_name, + downsample=downsample, + scale_x_y=scale_x_y, + clip_bbox=clip_bbox) + self.act = act + + def _detection_block(self, + input, + channel, + conv_block_num=2, + is_first=False, + is_test=True, + name=None): + conv_left = self._conv_bn( + input, + channel, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.left'.format(name)) + conv_right = self._conv_bn( + input, + channel, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.right'.format(name)) + for j in range(conv_block_num): + conv_left = self._conv_bn( + conv_left, + channel, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.left.{}'.format(name, 2 * j)) + if self.use_spp and is_first and j == 1: + c = conv_left.shape[1] + conv_left = self._spp_module(conv_left, name="spp") + conv_left = self._conv_bn( + conv_left, + c, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.left.{}'.format(name, 2 * j + 1)) + else: + conv_left = self._conv_bn( + conv_left, + channel, + act=self.act, + filter_size=3, + stride=1, + padding=1, + name='{}.left.{}'.format(name, 2 * j + 1)) + if self.drop_block and j == 1: + conv_left = DropBlock( + conv_left, + block_size=self.block_size, + keep_prob=self.keep_prob, + is_test=is_test) + + conv = fluid.layers.concat(input=[conv_left, conv_right], axis=1) + conv = self._conv_bn( + conv, + channel * 2, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name=name) + return conv, conv + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + blocks = input[-1:-out_layer_num - 1:-1] + + # fpn + yolo_feats = [] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, + channel=512 // (2**i), + is_first=i == 0, + is_test=(not is_train), + conv_block_num=self.conv_block_num, + name=self.prefix_name + "fpn.{}".format(i)) + + yolo_feats.append(tip) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=512 // (2**i), + filter_size=1, + stride=1, + padding=0, + act=self.act, + name=self.prefix_name + "fpn_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + # pan + pan_feats = [yolo_feats[-1]] + route = yolo_feats[out_layer_num - 1] + for i in reversed(range(out_layer_num - 1)): + channel = 512 // (2**i) + route = self._conv_bn( + input=route, + ch_out=channel, + filter_size=3, + stride=2, + padding=1, + act=self.act, + name=self.prefix_name + "pan_transition.{}".format(i)) + block = yolo_feats[i] + block = fluid.layers.concat(input=[route, block], axis=1) + + route, tip = self._detection_block( + block, + channel=channel, + is_first=False, + is_test=(not is_train), + conv_block_num=self.conv_block_num, + name=self.prefix_name + "pan.{}".format(i)) + + pan_feats.append(tip) + + pan_feats = pan_feats[::-1] + outputs = [] + for i, block in enumerate(pan_feats): + if self.iou_aware: + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 6) + else: + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + with fluid.name_scope('yolo_output'): + block_out = fluid.layers.conv2d( + input=block, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr( + name=self.prefix_name + + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), + name=self.prefix_name + + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + return outputs diff --git a/static/ppdet/modeling/losses/iou_aware_loss.py b/static/ppdet/modeling/losses/iou_aware_loss.py index c68c7a707..d0aeb9df3 100644 --- a/static/ppdet/modeling/losses/iou_aware_loss.py +++ b/static/ppdet/modeling/losses/iou_aware_loss.py @@ -74,6 +74,7 @@ class IouAwareLoss(IouLoss): iouk = self._iou(pred, gt, ioup, eps) iouk.stop_gradient = True - loss_iou_aware = fluid.layers.cross_entropy(ioup, iouk, soft_label=True) + loss_iou_aware = fluid.layers.sigmoid_cross_entropy_with_logits(ioup, + iouk) loss_iou_aware = loss_iou_aware * self._loss_weight return loss_iou_aware diff --git a/static/ppdet/modeling/losses/yolo_loss.py b/static/ppdet/modeling/losses/yolo_loss.py index c16c6cb11..553e63322 100644 --- a/static/ppdet/modeling/losses/yolo_loss.py +++ b/static/ppdet/modeling/losses/yolo_loss.py @@ -238,7 +238,6 @@ class YOLOv3Loss(object): along channel dimension """ ioup = fluid.layers.slice(output, axes=[1], starts=[0], ends=[an_num]) - ioup = fluid.layers.sigmoid(ioup) oriout = fluid.layers.slice( output, axes=[1], -- GitLab