From a718694c9cea3f81d1c0a86f2cd01004cb719f5c Mon Sep 17 00:00:00 2001 From: wangxinxin08 <69842442+wangxinxin08@users.noreply.github.com> Date: Thu, 15 Apr 2021 09:47:29 +0800 Subject: [PATCH] add ppyolov2 (#2626) * add ppyolov2 * fix bugs and modify docs * modify code and doc according to review * fix bugs while resolving conflicts --- configs/ppyolo/README.md | 38 +- configs/ppyolo/README_cn.md | 37 +- configs/ppyolo/_base_/optimizer_365e.yml | 21 ++ configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml | 65 ++++ configs/ppyolo/_base_/ppyolov2_reader.yml | 43 +++ .../ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml | 20 + .../ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml | 10 + ppdet/modeling/backbones/darknet.py | 4 +- ppdet/modeling/losses/iou_aware_loss.py | 4 +- ppdet/modeling/necks/yolo_fpn.py | 350 ++++++++++++++++-- ppdet/modeling/ops.py | 5 + static/configs/ppyolo/README.md | 45 +-- static/configs/ppyolo/README_cn.md | 44 +-- static/configs/ppyolo/ppyolov2_r101vd_dcn.yml | 89 +++++ static/configs/ppyolo/ppyolov2_r50vd_dcn.yml | 89 +++++ static/configs/ppyolo/ppyolov2_reader.yml | 111 ++++++ .../ppdet/modeling/anchor_heads/yolo_head.py | 234 +++++++++++- .../ppdet/modeling/losses/iou_aware_loss.py | 3 +- static/ppdet/modeling/losses/yolo_loss.py | 1 - 19 files changed, 1091 insertions(+), 122 deletions(-) create mode 100644 configs/ppyolo/_base_/optimizer_365e.yml create mode 100644 configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml create mode 100644 configs/ppyolo/_base_/ppyolov2_reader.yml create mode 100644 configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml create mode 100644 configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml create mode 100644 static/configs/ppyolo/ppyolov2_r101vd_dcn.yml create mode 100644 static/configs/ppyolo/ppyolov2_r50vd_dcn.yml create mode 100644 static/configs/ppyolo/ppyolov2_reader.yml diff --git a/configs/ppyolo/README.md b/configs/ppyolo/README.md index 5ea2c7b3c..ba73b470a 100644 --- a/configs/ppyolo/README.md +++ b/configs/ppyolo/README.md @@ -38,17 +38,20 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | backbone | input shape | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config | |:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r50vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r101vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml) | + **Notes:** @@ -62,8 +65,8 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | Model Size | input shape | Box APval | Box AP50val | Kirin 990 1xCore(FPS) | download | config | |:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | **Notes:** @@ -91,9 +94,9 @@ PP-YOLO trained on Pascal VOC dataset as follows: | Model | GPU number | images/GPU | backbone | input shape | Box AP50val | download | config | |:------------------:|:----------:|:----------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | ## Getting Start @@ -184,8 +187,7 @@ Optimizing method and ablation experiments of PP-YOLO compared with YOLOv3. - Performance and inference spedd are measure with input shape as 608 - All models are trained on COCO train2017 datast and evaluated on val2017 & test-dev2017 dataset,`Box AP` is evaluation results as `mAP(IoU=0.5:0.95)`. - Inference speed is tested on single Tesla V100 with batch size as 1 following test method and environment configuration in benchmark above. -- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) with mAP as 39.0 is optimized YOLOv3 model in PaddleDetection,see [Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) for details. - +- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) with mAP as 39.0 is optimized YOLOv3 model in PaddleDetection,see [Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) for details. ## Citation diff --git a/configs/ppyolo/README_cn.md b/configs/ppyolo/README_cn.md index 19c2724e9..9de54d118 100644 --- a/configs/ppyolo/README_cn.md +++ b/configs/ppyolo/README_cn.md @@ -38,17 +38,19 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 | |:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.2 | 29.5 | 357.1 | 657.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r18vd_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r50vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddledet.bj.bcebos.com/models/ppyolov2_r101vd_dcn_365e_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml) | **注意:** @@ -63,8 +65,8 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 模型体积 | 输入尺寸 | Box APval | Box AP50val | Kirin 990 1xCore (FPS) | 模型下载 | 配置文件 | |:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_large_coco.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_small_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_mbv3_small_coco.yml) | - PP-YOLO_MobileNetV3 模型使用COCO数据集中train2017作为训练集,使用val2017作为测试集,Box APval为`mAP(IoU=0.5:0.95)`评估结果, Box AP50val为`mAP(IoU=0.5)`评估结果。 - PP-YOLO_MobileNetV3 模型训练过程中使用4GPU,每GPU batch size为32进行训练,如训练GPU数和batch size不使用上述配置,须参考[FAQ](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/FAQ.md)调整学习率和迭代次数。 @@ -76,9 +78,9 @@ PP-YOLO在Pascal VOC数据集上训练模型如下: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box AP50val | 模型下载 | 配置文件 | |:------------------:|:-------:|:-------------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddledet.bj.bcebos.com/models/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) | ## 使用说明 @@ -169,8 +171,7 @@ PP-YOLO模型相对于YOLOv3模型优化项消融实验数据如下表所示。 - 精度与推理速度数据均为使用输入图像尺寸为608的测试结果 - Box AP为在COCO train2017数据集训练,val2017和test-dev2017数据集上评估`mAP(IoU=0.5:0.95)`数据 - 推理速度为单卡V100上,batch size=1, 使用上述benchmark测试方法的测试结果,测试环境配置为CUDA 10.2,CUDNN 7.5.1 -- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml)精度38.9为PaddleDetection优化后的YOLOv3模型,可参见[模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) - +- [YOLOv3-DarkNet53](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml)精度38.9为PaddleDetection优化后的YOLOv3模型,可参见[模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/MODEL_ZOO.md) ## 引用 diff --git a/configs/ppyolo/_base_/optimizer_365e.yml b/configs/ppyolo/_base_/optimizer_365e.yml new file mode 100644 index 000000000..d834a4ce0 --- /dev/null +++ b/configs/ppyolo/_base_/optimizer_365e.yml @@ -0,0 +1,21 @@ +epoch: 365 + +LearningRate: + base_lr: 0.005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 243 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + clip_grad_by_norm: 35. + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 diff --git a/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml b/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml new file mode 100644 index 000000000..6288adeed --- /dev/null +++ b/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml @@ -0,0 +1,65 @@ +architecture: YOLOv3 +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams +norm_type: sync_bn +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: ResNet + neck: PPYOLOPAN + yolo_head: YOLOv3Head + post_process: BBoxPostProcess + +ResNet: + depth: 50 + variant: d + return_idx: [1, 2, 3] + dcn_v2_stages: [3] + freeze_at: -1 + freeze_norm: false + norm_decay: 0. + +PPYOLOPAN: + drop_block: true + block_size: 3 + keep_prob: 0.9 + spp: true + +YOLOv3Head: + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + loss: YOLOv3Loss + iou_aware: true + iou_aware_factor: 0.5 + +YOLOv3Loss: + ignore_thresh: 0.7 + downsample: [32, 16, 8] + label_smooth: false + scale_x_y: 1.05 + iou_loss: IouLoss + iou_aware_loss: IouAwareLoss + +IouLoss: + loss_weight: 2.5 + loss_square: true + +IouAwareLoss: + loss_weight: 1.0 + +BBoxPostProcess: + decode: + name: YOLOBox + conf_thresh: 0.01 + downsample_ratio: 32 + clip_bbox: true + scale_x_y: 1.05 + nms: + name: MatrixNMS + keep_top_k: 100 + score_threshold: 0.01 + post_threshold: 0.01 + nms_top_k: -1 + background_label: -1 diff --git a/configs/ppyolo/_base_/ppyolov2_reader.yml b/configs/ppyolo/_base_/ppyolov2_reader.yml new file mode 100644 index 000000000..747253131 --- /dev/null +++ b/configs/ppyolo/_base_/ppyolov2_reader.yml @@ -0,0 +1,43 @@ +worker_num: 8 +TrainReader: + inputs_def: + num_max_boxes: 100 + sample_transforms: + - Decode: {} + - Mixup: {alpha: 1.5, beta: 1.5} + - RandomDistort: {} + - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} + - RandomCrop: {} + - RandomFlip: {} + batch_transforms: + - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False} + - NormalizeBox: {} + - PadBox: {num_max_boxes: 100} + - BboxXYXY2XYWH: {} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]} + batch_size: 12 + shuffle: true + drop_last: true + mixup_epoch: 25000 + use_shared_memory: true + +EvalReader: + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_size: 8 + drop_empty: false + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + sample_transforms: + - Decode: {} + - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} + - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} + - Permute: {} + batch_size: 1 diff --git a/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml b/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml new file mode 100644 index 000000000..0f1aee746 --- /dev/null +++ b/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml @@ -0,0 +1,20 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + './_base_/ppyolov2_r50vd_dcn.yml', + './_base_/optimizer_365e.yml', + './_base_/ppyolov2_reader.yml', +] + +snapshot_epoch: 8 +weights: output/ppyolov2_r101vd_dcn_365e_coco/model_final +pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams + +ResNet: + depth: 101 + variant: d + return_idx: [1, 2, 3] + dcn_v2_stages: [3] + freeze_at: -1 + freeze_norm: false + norm_decay: 0. diff --git a/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml b/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml new file mode 100644 index 000000000..a5e1bc335 --- /dev/null +++ b/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml @@ -0,0 +1,10 @@ +_BASE_: [ + '../datasets/coco_detection.yml', + '../runtime.yml', + './_base_/ppyolov2_r50vd_dcn.yml', + './_base_/optimizer_365e.yml', + './_base_/ppyolov2_reader.yml', +] + +snapshot_epoch: 8 +weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final diff --git a/ppdet/modeling/backbones/darknet.py b/ppdet/modeling/backbones/darknet.py index 9bf0cdaa9..13af903d4 100755 --- a/ppdet/modeling/backbones/darknet.py +++ b/ppdet/modeling/backbones/darknet.py @@ -18,7 +18,7 @@ import paddle.nn.functional as F from paddle import ParamAttr from paddle.regularizer import L2Decay from ppdet.core.workspace import register, serializable -from ppdet.modeling.ops import batch_norm +from ppdet.modeling.ops import batch_norm, mish from ..shape_spec import ShapeSpec __all__ = ['DarkNet', 'ConvBNLayer'] @@ -77,6 +77,8 @@ class ConvBNLayer(nn.Layer): out = self.batch_norm(out) if self.act == 'leaky': out = F.leaky_relu(out, 0.1) + elif self.act == 'mish': + out = mish(out) return out diff --git a/ppdet/modeling/losses/iou_aware_loss.py b/ppdet/modeling/losses/iou_aware_loss.py index f5599588c..1e6aa8bf0 100644 --- a/ppdet/modeling/losses/iou_aware_loss.py +++ b/ppdet/modeling/losses/iou_aware_loss.py @@ -42,7 +42,7 @@ class IouAwareLoss(IouLoss): iou = bbox_iou( pbox, gbox, giou=self.giou, diou=self.diou, ciou=self.ciou) iou.stop_gradient = True - ioup = F.sigmoid(ioup) - loss_iou_aware = (-iou * paddle.log(ioup)).sum(-2, keepdim=True) + loss_iou_aware = F.binary_cross_entropy_with_logits( + ioup, iou, reduction='none') loss_iou_aware = loss_iou_aware * self.loss_weight return loss_iou_aware diff --git a/ppdet/modeling/necks/yolo_fpn.py b/ppdet/modeling/necks/yolo_fpn.py index 15309eac6..873e43f0e 100644 --- a/ppdet/modeling/necks/yolo_fpn.py +++ b/ppdet/modeling/necks/yolo_fpn.py @@ -25,6 +25,32 @@ from ..shape_spec import ShapeSpec __all__ = ['YOLOv3FPN', 'PPYOLOFPN'] +def add_coord(x): + b = x.shape[0] + if self.data_format == 'NCHW': + h = x.shape[2] + w = x.shape[3] + else: + h = x.shape[1] + w = x.shape[2] + + gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1. + if self.data_format == 'NCHW': + gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w]) + else: + gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1]) + gx.stop_gradient = True + + gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1. + if self.data_format == 'NCHW': + gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w]) + else: + gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1]) + gy.stop_gradient = True + + return gx, gy + + class YoloDetBlock(nn.Layer): def __init__(self, ch_in, channel, norm_type, name, data_format='NCHW'): """ @@ -87,6 +113,7 @@ class SPP(nn.Layer): pool_size, norm_type, name, + act='leaky', data_format='NCHW'): """ SPP layer, which consist of four pooling layer follwed by conv layer @@ -101,6 +128,7 @@ class SPP(nn.Layer): """ super(SPP, self).__init__() self.pool = [] + self.data_format = data_format for size in pool_size: pool = self.add_sublayer( '{}.pool1'.format(name), @@ -118,13 +146,18 @@ class SPP(nn.Layer): padding=k // 2, norm_type=norm_type, name=name, + act=act, data_format=data_format) def forward(self, x): outs = [x] for pool in self.pool: outs.append(pool(x)) - y = paddle.concat(outs, axis=1) + if self.data_format == "NCHW": + y = paddle.concat(outs, axis=1) + else: + y = paddle.concat(outs, axis=-1) + y = self.conv(y) return y @@ -204,28 +237,7 @@ class CoordConv(nn.Layer): self.data_format = data_format def forward(self, x): - b = x.shape[0] - if self.data_format == 'NCHW': - h = x.shape[2] - w = x.shape[3] - else: - h = x.shape[1] - w = x.shape[2] - - gx = paddle.arange(w, dtype='float32') / (w - 1.) * 2.0 - 1. - if self.data_format == 'NCHW': - gx = gx.reshape([1, 1, 1, w]).expand([b, 1, h, w]) - else: - gx = gx.reshape([1, 1, w, 1]).expand([b, h, w, 1]) - gx.stop_gradient = True - - gy = paddle.arange(h, dtype='float32') / (h - 1.) * 2.0 - 1. - if self.data_format == 'NCHW': - gy = gy.reshape([1, 1, h, 1]).expand([b, 1, h, w]) - else: - gy = gy.reshape([1, h, 1, 1]).expand([b, h, w, 1]) - gy.stop_gradient = True - + gx, gy = add_coord(x) if self.data_format == 'NCHW': y = paddle.concat([x, gx, gy], axis=1) else: @@ -273,7 +285,6 @@ class PPYOLOTinyDetBlock(nn.Layer): data_format='NCHW'): """ PPYOLO Tiny DetBlock layer - Args: ch_in (list): input channel number ch_out (list): output channel number @@ -333,6 +344,73 @@ class PPYOLOTinyDetBlock(nn.Layer): return route, tip +class PPYOLODetBlockCSP(nn.Layer): + def __init__(self, + cfg, + ch_in, + ch_out, + act, + norm_type, + name, + data_format='NCHW'): + """ + PPYOLODetBlockCSP layer + + Args: + cfg (list): layer configs for this block + ch_in (int): input channel + ch_out (int): output channel + act (str): default mish + name (str): block name + data_format (str): data format, NCHW or NHWC + """ + super(PPYOLODetBlockCSP, self).__init__() + self.data_format = data_format + self.conv1 = ConvBNLayer( + ch_in, + ch_out, + 1, + padding=0, + act=act, + norm_type=norm_type, + name=name + '.left', + data_format=data_format) + self.conv2 = ConvBNLayer( + ch_in, + ch_out, + 1, + padding=0, + act=act, + norm_type=norm_type, + name=name + '.right', + data_format=data_format) + self.conv3 = ConvBNLayer( + ch_out * 2, + ch_out * 2, + 1, + padding=0, + act=act, + norm_type=norm_type, + name=name, + data_format=data_format) + self.conv_module = nn.Sequential() + for idx, (layer_name, layer, args, kwargs) in enumerate(cfg): + kwargs.update(name=name + layer_name, data_format=data_format) + self.conv_module.add_sublayer(layer_name, layer(*args, **kwargs)) + + def forward(self, inputs): + conv_left = self.conv1(inputs) + conv_right = self.conv2(inputs) + conv_left = self.conv_module(conv_left) + if self.data_format == 'NCHW': + conv = paddle.concat([conv_left, conv_right], axis=1) + else: + conv = paddle.concat([conv_left, conv_right], axis=-1) + + conv = self.conv3(conv) + return conv, conv + + @register @serializable class YOLOv3FPN(nn.Layer): @@ -430,7 +508,12 @@ class PPYOLOFPN(nn.Layer): in_channels=[512, 1024, 2048], norm_type='bn', data_format='NCHW', - **kwargs): + coord_conv=False, + conv_block_num=3, + drop_block=False, + block_size=3, + keep_prob=0.9, + spp=False): """ PPYOLOFPN layer @@ -438,7 +521,12 @@ class PPYOLOFPN(nn.Layer): in_channels (list): input channels for fpn norm_type (str): batch norm type, default bn data_format (str): data format, NCHW or NHWC - kwargs: extra key-value pairs, such as parameter of DropBlock and spp + coord_conv (bool): whether use CoordConv or not + conv_block_num (int): conv block num of each pan block + drop_block (bool): whether use DropBlock or not + block_size (int): block size of DropBlock + keep_prob (float): keep probability of DropBlock + spp (bool): whether use spp or not """ super(PPYOLOFPN, self).__init__() @@ -446,14 +534,12 @@ class PPYOLOFPN(nn.Layer): self.in_channels = in_channels self.num_blocks = len(in_channels) # parse kwargs - self.coord_conv = kwargs.get('coord_conv', False) - self.drop_block = kwargs.get('drop_block', False) - if self.drop_block: - self.block_size = kwargs.get('block_size', 3) - self.keep_prob = kwargs.get('keep_prob', 0.9) - - self.spp = kwargs.get('spp', False) - self.conv_block_num = kwargs.get('conv_block_num', 2) + self.coord_conv = coord_conv + self.drop_block = drop_block + self.block_size = block_size + self.keep_prob = keep_prob + self.spp = spp + self.conv_block_num = conv_block_num self.data_format = data_format if self.coord_conv: ConvLayer = CoordConv @@ -583,14 +669,12 @@ class PPYOLOTinyFPN(nn.Layer): **kwargs): """ PPYOLO Tiny FPN layer - Args: in_channels (list): input channels for fpn detection_block_channels (list): channels in fpn norm_type (str): batch norm type, default bn data_format (str): data format, NCHW or NHWC kwargs: extra key-value pairs, such as parameter of DropBlock and spp - """ super(PPYOLOTinyFPN, self).__init__() assert len(in_channels) > 0, "in_channels length should > 0" @@ -681,3 +765,197 @@ class PPYOLOTinyFPN(nn.Layer): @property def out_shape(self): return [ShapeSpec(channels=c) for c in self._out_channels] + + +@register +@serializable +class PPYOLOPAN(nn.Layer): + __shared__ = ['norm_type', 'data_format'] + + def __init__(self, + in_channels=[512, 1024, 2048], + norm_type='bn', + data_format='NCHW', + act='mish', + conv_block_num=3, + drop_block=False, + block_size=3, + keep_prob=0.9, + spp=False): + """ + PPYOLOPAN layer with SPP, DropBlock and CSP connection. + + Args: + in_channels (list): input channels for fpn + norm_type (str): batch norm type, default bn + data_format (str): data format, NCHW or NHWC + act (str): activation function, default mish + conv_block_num (int): conv block num of each pan block + drop_block (bool): whether use DropBlock or not + block_size (int): block size of DropBlock + keep_prob (float): keep probability of DropBlock + spp (bool): whether use spp or not + + """ + super(PPYOLOPAN, self).__init__() + assert len(in_channels) > 0, "in_channels length should > 0" + self.in_channels = in_channels + self.num_blocks = len(in_channels) + # parse kwargs + self.drop_block = drop_block + self.block_size = block_size + self.keep_prob = keep_prob + self.spp = spp + self.conv_block_num = conv_block_num + self.data_format = data_format + if self.drop_block: + dropblock_cfg = [[ + 'dropblock', DropBlock, [self.block_size, self.keep_prob], + dict() + ]] + else: + dropblock_cfg = [] + + # fpn + self.fpn_blocks = [] + self.fpn_routes = [] + fpn_channels = [] + for i, ch_in in enumerate(self.in_channels[::-1]): + if i > 0: + ch_in += 512 // (2**(i - 1)) + channel = 512 // (2**i) + base_cfg = [] + for j in range(self.conv_block_num): + base_cfg += [ + # name, layer, args + [ + '{}.0'.format(j), ConvBNLayer, [channel, channel, 1], + dict( + padding=0, act=act, norm_type=norm_type) + ], + [ + '{}.1'.format(j), ConvBNLayer, [channel, channel, 3], + dict( + padding=1, act=act, norm_type=norm_type) + ] + ] + + if i == 0 and self.spp: + base_cfg[3] = [ + 'spp', SPP, [channel * 4, channel, 1], dict( + pool_size=[5, 9, 13], act=act, norm_type=norm_type) + ] + + cfg = base_cfg[:4] + dropblock_cfg + base_cfg[4:] + name = 'fpn.{}'.format(i) + fpn_block = self.add_sublayer( + name, + PPYOLODetBlockCSP(cfg, ch_in, channel, act, norm_type, name, + data_format)) + self.fpn_blocks.append(fpn_block) + fpn_channels.append(channel * 2) + if i < self.num_blocks - 1: + name = 'fpn_transition.{}'.format(i) + route = self.add_sublayer( + name, + ConvBNLayer( + ch_in=channel * 2, + ch_out=channel, + filter_size=1, + stride=1, + padding=0, + act=act, + norm_type=norm_type, + data_format=data_format, + name=name)) + self.fpn_routes.append(route) + # pan + self.pan_blocks = [] + self.pan_routes = [] + self._out_channels = [512 // (2**(self.num_blocks - 2)), ] + for i in reversed(range(self.num_blocks - 1)): + name = 'pan_transition.{}'.format(i) + route = self.add_sublayer( + name, + ConvBNLayer( + ch_in=fpn_channels[i + 1], + ch_out=fpn_channels[i + 1], + filter_size=3, + stride=2, + padding=1, + act=act, + norm_type=norm_type, + data_format=data_format, + name=name)) + self.pan_routes = [route, ] + self.pan_routes + base_cfg = [] + ch_in = fpn_channels[i] + fpn_channels[i + 1] + channel = 512 // (2**i) + for j in range(self.conv_block_num): + base_cfg += [ + # name, layer, args + [ + '{}.0'.format(j), ConvBNLayer, [channel, channel, 1], + dict( + padding=0, act=act, norm_type=norm_type) + ], + [ + '{}.1'.format(j), ConvBNLayer, [channel, channel, 3], + dict( + padding=1, act=act, norm_type=norm_type) + ] + ] + + cfg = base_cfg[:4] + dropblock_cfg + base_cfg[4:] + name = 'pan.{}'.format(i) + pan_block = self.add_sublayer( + name, + PPYOLODetBlockCSP(cfg, ch_in, channel, act, norm_type, name, + data_format)) + + self.pan_blocks = [pan_block, ] + self.pan_blocks + self._out_channels.append(channel * 2) + + self._out_channels = self._out_channels[::-1] + + def forward(self, blocks): + assert len(blocks) == self.num_blocks + blocks = blocks[::-1] + # fpn + fpn_feats = [] + for i, block in enumerate(blocks): + if i > 0: + if self.data_format == 'NCHW': + block = paddle.concat([route, block], axis=1) + else: + block = paddle.concat([route, block], axis=-1) + route, tip = self.fpn_blocks[i](block) + fpn_feats.append(tip) + + if i < self.num_blocks - 1: + route = self.fpn_routes[i](route) + route = F.interpolate( + route, scale_factor=2., data_format=self.data_format) + + pan_feats = [fpn_feats[-1], ] + route = fpn_feats[self.num_blocks - 1] + for i in reversed(range(self.num_blocks - 1)): + block = fpn_feats[i] + route = self.pan_routes[i](route) + if self.data_format == 'NCHW': + block = paddle.concat([route, block], axis=1) + else: + block = paddle.concat([route, block], axis=-1) + + route, tip = self.pan_blocks[i](block) + pan_feats.append(tip) + + return pan_feats[::-1] + + @classmethod + def from_config(cls, cfg, input_shape): + return {'in_channels': [i.channels for i in input_shape], } + + @property + def out_shape(self): + return [ShapeSpec(channels=c) for c in self._out_channels] diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index cbef3d6b4..e2c193030 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -41,9 +41,14 @@ __all__ = [ 'collect_fpn_proposals', 'matrix_nms', 'batch_norm', + 'mish', ] +def mish(x): + return x * paddle.tanh(F.softplus(x)) + + def batch_norm(ch, norm_type='bn', norm_decay=0., diff --git a/static/configs/ppyolo/README.md b/static/configs/ppyolo/README.md index f8f707634..0f363971c 100644 --- a/static/configs/ppyolo/README.md +++ b/static/configs/ppyolo/README.md @@ -38,21 +38,24 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | backbone | input shape | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config | |:------------------------:|:----------:|:----------:|:----------:| :----------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :-----: | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_2x.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [model](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_2x.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_2x.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r50vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r101vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml) | + **Notes:** @@ -69,8 +72,8 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | Model Size | input shape | Box APval | Box AP50val | Kirin 990 1xCore(FPS) | download | inference model download | config | |:----------------------------:|:----------:|:----------:| :--------: | :----------:| :------------------: | :--------------------: | :-------------------: | :------: | :----------------------: | :-----: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 15.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 28.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 15.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 28.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | **Notes:** @@ -82,7 +85,7 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods: | Model | GPU number | images/GPU | Prune Ratio | Teacher Model | Model Size | input shape | Box APval | Kirin 990 1xCore(FPS) | download | inference model download | config | |:----------------------------:|:----------:|:----------:| :---------: | :-----------------------: | :--------: | :----------:| :------------------: | :-------------------: | :------: | :----------------------: | :-----: | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | - Slim PP-YOLO is trained by slim traing method from [Distill pruned model](../../slim/extentions/distill_pruned_model/README.md),distill training pruned PP-YOLO_MobileNetV3_small model with PP-YOLO_MobileNetV3_large model as the teacher model - Pruning detectiom head of PP-YOLO model with ratio as 75%, while the arguments are `--pruned_params="yolo_block.0.2.conv.weights,yolo_block.0.tip.conv.weights,yolo_block.1.2.conv.weights,yolo_block.1.tip.conv.weights" --pruned_ratios="0.75,0.75,0.75,0.75"` @@ -108,9 +111,9 @@ PP-YOLO trained on Pascal VOC dataset as follows: | Model | GPU number | images/GPU | backbone | input shape | Box AP50val | download | config | |:------------------:|:----------:|:----------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | ## Getting Start diff --git a/static/configs/ppyolo/README_cn.md b/static/configs/ppyolo/README_cn.md index a811a7e0d..392436ef4 100644 --- a/static/configs/ppyolo/README_cn.md +++ b/static/configs/ppyolo/README_cn.md @@ -38,21 +38,23 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 | |:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/yolov4/yolov4_csdarknet.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | -| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_r18vd.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 608 | - | 43.5 | 62 | 105.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 512 | - | 43.0 | 83 | 138.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 416 | - | 41.2 | 96 | 164.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| YOLOv4(AlexyAB) | - | - | CSPDarknet | 320 | - | 38.0 | 123 | 199.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov4_cspdarknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/yolov4/yolov4_csdarknet.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLO | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_r18vd.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_r18vd.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet50vd | 640 | 49.1 | 49.5 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r50vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml) | +| PP-YOLOv2 | 8 | 12 | ResNet101vd | 640 | 49.7 | 50.1 | - | - | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolov2_r101vd_dcn.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml) | **注意:** @@ -70,8 +72,8 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU个数 | 每GPU图片个数 | 模型体积 | 输入尺寸 | Box APval | Box AP50val | Kirin 990 1xCore (FPS) | 模型下载 | 预测模型下载 | 配置文件 | |:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :----------: | :------: | -| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_large | 4 | 32 | 18MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_large.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 11MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | - PP-YOLO_MobileNetV3 模型使用COCO数据集中train2017作为训练集,使用val2017作为测试集,Box APval为`mAP(IoU=0.5:0.95)`评估结果, Box AP50val为`mAP(IoU=0.5)`评估结果。 - PP-YOLO_MobileNetV3 模型训练过程中使用4GPU,每GPU batch size为32进行训练,如训练GPU数和batch size不使用上述配置,须参考[FAQ](../../docs/FAQ.md)调整学习率和迭代次数。 @@ -81,7 +83,7 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度: | 模型 | GPU 个数 | 每GPU图片个数 | 裁剪率 | Teacher模型 | 模型体积 | 输入尺寸 | Box APval | Kirin 990 1xCore (FPS) | 模型下载 | 预测模型下载 | 配置文件 | |:----------------------------:|:----------:|:-------------:| :---------: | :-----------------------: | :--------: | :----------:| :------------------: | :--------------------: | :------: | :----------: | :------: | -| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | +| PP-YOLO_MobileNetV3_small | 4 | 32 | 75% | PP-YOLO_MobileNetV3_large | 4.2MB | 320 | 16.2 | 39.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.pdparams) | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_mobilenet_v3_small_prune75_distillby_mobilenet_v3_large.tar) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_mobilenet_v3_small.yml) | - PP-YOLO 轻量级裁剪模型采用[蒸馏通道剪裁模型](../../slim/extentions/distill_pruned_model/README.md) 的方式训练得到,基于 PP-YOLO_MobileNetV3_small 模型对Head部分做卷积通道剪裁后使用 PP-YOLO_MobileNetV3_large 模型进行蒸馏训练 - 卷积通道检测对Head部分剪裁掉75%的通道数,及剪裁参数为`--pruned_params="yolo_block.0.2.conv.weights,yolo_block.0.tip.conv.weights,yolo_block.1.2.conv.weights,yolo_block.1.tip.conv.weights" --pruned_ratios="0.75,0.75,0.75,0.75"` @@ -105,9 +107,9 @@ PP-YOLO在Pascal VOC数据集上训练模型如下: | 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box AP50val | 模型下载 | 配置文件 | |:------------------:|:-------:|:-------------:|:----------:| :----------:| :--------------------: | :------: | :-----: | -| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | -| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | +| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/static/configs/ppyolo/ppyolo_voc.yml) | ## 使用说明 diff --git a/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml b/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml new file mode 100644 index 000000000..9ba339912 --- /dev/null +++ b/static/configs/ppyolo/ppyolov2_r101vd_dcn.yml @@ -0,0 +1,89 @@ +architecture: YOLOv3 +use_gpu: true +max_iters: 450000 +log_iter: 100 +save_dir: output +snapshot_iter: 10000 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_ssld_pretrained.tar +weights: output/ppyolov2_r101vd_dcn/model_final +num_classes: 80 +use_fine_grained_loss: true +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: ResNet + yolo_head: YOLOv3PANHead + use_fine_grained_loss: true + +ResNet: + norm_type: sync_bn + freeze_at: 0 + freeze_norm: false + norm_decay: 0. + depth: 101 + feature_maps: [3, 4, 5] + variant: d + dcn_v2_stages: [5] + +YOLOv3PANHead: + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + norm_decay: 0. + iou_aware: true + iou_aware_factor: 0.5 + scale_x_y: 1.05 + spp: true + yolo_loss: YOLOv3Loss + nms: MatrixNMS + drop_block: true + +YOLOv3Loss: + ignore_thresh: 0.7 + scale_x_y: 1.05 + label_smooth: false + use_fine_grained_loss: true + iou_loss: IouLoss + iou_aware_loss: IouAwareLoss + +IouLoss: + loss_weight: 2.5 + max_height: 768 + max_width: 768 + +IouAwareLoss: + loss_weight: 1.0 + max_height: 768 + max_width: 768 + +MatrixNMS: + background_label: -1 + keep_top_k: 100 + normalized: false + score_threshold: 0.01 + post_threshold: 0.01 + +LearningRate: + base_lr: 0.005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 300000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + clip_grad_by_norm: 35. + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'ppyolov2_reader.yml' diff --git a/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml b/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml new file mode 100644 index 000000000..7ceb75833 --- /dev/null +++ b/static/configs/ppyolo/ppyolov2_r50vd_dcn.yml @@ -0,0 +1,89 @@ +architecture: YOLOv3 +use_gpu: true +max_iters: 450000 +log_iter: 100 +save_dir: output +snapshot_iter: 10000 +metric: COCO +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar +weights: output/ppyolov2_r50vd_dcn/model_final +num_classes: 80 +use_fine_grained_loss: true +use_ema: true +ema_decay: 0.9998 + +YOLOv3: + backbone: ResNet + yolo_head: YOLOv3PANHead + use_fine_grained_loss: true + +ResNet: + norm_type: sync_bn + freeze_at: 0 + freeze_norm: false + norm_decay: 0. + depth: 50 + feature_maps: [3, 4, 5] + variant: d + dcn_v2_stages: [5] + +YOLOv3PANHead: + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + norm_decay: 0. + iou_aware: true + iou_aware_factor: 0.5 + scale_x_y: 1.05 + spp: true + yolo_loss: YOLOv3Loss + nms: MatrixNMS + drop_block: true + +YOLOv3Loss: + ignore_thresh: 0.7 + scale_x_y: 1.05 + label_smooth: false + use_fine_grained_loss: true + iou_loss: IouLoss + iou_aware_loss: IouAwareLoss + +IouLoss: + loss_weight: 2.5 + max_height: 768 + max_width: 768 + +IouAwareLoss: + loss_weight: 1.0 + max_height: 768 + max_width: 768 + +MatrixNMS: + background_label: -1 + keep_top_k: 100 + normalized: false + score_threshold: 0.01 + post_threshold: 0.01 + +LearningRate: + base_lr: 0.005 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 300000 + - !LinearWarmup + start_factor: 0. + steps: 4000 + +OptimizerBuilder: + clip_grad_by_norm: 35. + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'ppyolov2_reader.yml' diff --git a/static/configs/ppyolo/ppyolov2_reader.yml b/static/configs/ppyolo/ppyolov2_reader.yml new file mode 100644 index 000000000..02a385c18 --- /dev/null +++ b/static/configs/ppyolo/ppyolov2_reader.yml @@ -0,0 +1,111 @@ +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 100 + dataset: + !COCODataSet + image_dir: train2017 + anno_path: annotations/instances_train2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + with_mixup: True + - !MixupImage + alpha: 1.5 + beta: 1.5 + - !ColorDistort {} + - !RandomExpand + ratio: 2.0 + fill_value: [123.675, 116.28, 103.53] + - !RandomCrop {} + - !RandomFlipImage + is_normalized: false + - !NormalizeBox {} + - !PadBox + num_max_boxes: 100 + - !BboxXYXY2XYWH {} + batch_transforms: + - !RandomShape + sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768] + random_inter: True + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + # Gt2YoloTarget is only used when use_fine_grained_loss set as true, + # this operator will be deleted automatically if use_fine_grained_loss + # is set as false + - !Gt2YoloTarget + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + downsample_ratios: [32, 16, 8] + batch_size: 12 + shuffle: true + mixup_epoch: 25000 + drop_last: true + worker_num: 8 + bufsize: 4 + use_process: true + +EvalReader: + inputs_def: + fields: ['image', 'im_size', 'im_id'] + num_max_boxes: 100 + dataset: + !COCODataSet + image_dir: val2017 + anno_path: annotations/instances_val2017.json + dataset_dir: dataset/coco + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !ResizeImage + target_size: 640 + interp: 2 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !PadBox + num_max_boxes: 50 + - !Permute + to_bgr: false + channel_first: True + batch_size: 8 + drop_empty: false + worker_num: 8 + bufsize: 4 + +TestReader: + inputs_def: + image_shape: [3, 640, 640] + fields: ['image', 'im_size', 'im_id'] + dataset: + !ImageFolder + anno_path: annotations/instances_val2017.json + with_background: false + sample_transforms: + - !DecodeImage + to_rgb: True + - !ResizeImage + target_size: 640 + interp: 2 + - !NormalizeImage + mean: [0.485, 0.456, 0.406] + std: [0.229, 0.224, 0.225] + is_scale: True + is_channel_first: false + - !Permute + to_bgr: false + channel_first: True + batch_size: 1 diff --git a/static/ppdet/modeling/anchor_heads/yolo_head.py b/static/ppdet/modeling/anchor_heads/yolo_head.py index c7cf14822..49b211ff6 100644 --- a/static/ppdet/modeling/anchor_heads/yolo_head.py +++ b/static/ppdet/modeling/anchor_heads/yolo_head.py @@ -192,6 +192,8 @@ class YOLOv3Head(object): if act == 'leaky': out = fluid.layers.leaky_relu(x=out, alpha=0.1) + elif act == 'mish': + out = fluid.layers.mish(out) return out def _spp_module(self, input, name=""): @@ -657,7 +659,6 @@ class YOLOv4Head(YOLOv3Head): class PPYOLOTinyHead(YOLOv3Head): """ Head block for YOLOv3 network - Args: norm_decay (float): weight decay for normalization layer weights num_classes (int): number of output classes @@ -781,11 +782,9 @@ class PPYOLOTinyHead(YOLOv3Head): def _get_outputs(self, input, is_train=True): """ Get PP-YOLO tiny head output - Args: input (list): List of Variables, output of backbone stages is_train (bool): whether in train or test mode - Returns: outputs (list): Variables of each output layer """ @@ -838,3 +837,232 @@ class PPYOLOTinyHead(YOLOv3Head): route = self._upsample(route) return outputs + + +@register +class YOLOv3PANHead(YOLOv3Head): + """ + Head block for YOLOv3PANHead network + + Args: + conv_block_num (int): number of conv block in each detection block + norm_decay (float): weight decay for normalization layer weights + num_classes (int): number of output classes + anchors (list): anchors + anchor_masks (list): anchor masks + nms (object): an instance of `MultiClassNMS` + """ + __inject__ = ['yolo_loss', 'nms'] + __shared__ = ['num_classes', 'weight_prefix_name'] + + def __init__(self, + conv_block_num=3, + norm_decay=0., + num_classes=80, + anchors=[[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], + [59, 119], [116, 90], [156, 198], [373, 326]], + anchor_masks=[[6, 7, 8], [3, 4, 5], [0, 1, 2]], + drop_block=False, + iou_aware=False, + iou_aware_factor=0.4, + block_size=3, + keep_prob=0.9, + yolo_loss="YOLOv3Loss", + spp=False, + nms=MultiClassNMS( + score_threshold=0.01, + nms_top_k=1000, + keep_top_k=100, + nms_threshold=0.45, + background_label=-1).__dict__, + weight_prefix_name='', + downsample=[32, 16, 8], + scale_x_y=1.0, + clip_bbox=True, + act='mish'): + super(YOLOv3PANHead, self).__init__( + conv_block_num=conv_block_num, + norm_decay=norm_decay, + num_classes=num_classes, + anchors=anchors, + anchor_masks=anchor_masks, + drop_block=drop_block, + iou_aware=iou_aware, + iou_aware_factor=iou_aware_factor, + block_size=block_size, + keep_prob=keep_prob, + yolo_loss=yolo_loss, + spp=spp, + nms=nms, + weight_prefix_name=weight_prefix_name, + downsample=downsample, + scale_x_y=scale_x_y, + clip_bbox=clip_bbox) + self.act = act + + def _detection_block(self, + input, + channel, + conv_block_num=2, + is_first=False, + is_test=True, + name=None): + conv_left = self._conv_bn( + input, + channel, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.left'.format(name)) + conv_right = self._conv_bn( + input, + channel, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.right'.format(name)) + for j in range(conv_block_num): + conv_left = self._conv_bn( + conv_left, + channel, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.left.{}'.format(name, 2 * j)) + if self.use_spp and is_first and j == 1: + c = conv_left.shape[1] + conv_left = self._spp_module(conv_left, name="spp") + conv_left = self._conv_bn( + conv_left, + c, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name='{}.left.{}'.format(name, 2 * j + 1)) + else: + conv_left = self._conv_bn( + conv_left, + channel, + act=self.act, + filter_size=3, + stride=1, + padding=1, + name='{}.left.{}'.format(name, 2 * j + 1)) + if self.drop_block and j == 1: + conv_left = DropBlock( + conv_left, + block_size=self.block_size, + keep_prob=self.keep_prob, + is_test=is_test) + + conv = fluid.layers.concat(input=[conv_left, conv_right], axis=1) + conv = self._conv_bn( + conv, + channel * 2, + act=self.act, + filter_size=1, + stride=1, + padding=0, + name=name) + return conv, conv + + def _get_outputs(self, input, is_train=True): + """ + Get YOLOv3 head output + + Args: + input (list): List of Variables, output of backbone stages + is_train (bool): whether in train or test mode + + Returns: + outputs (list): Variables of each output layer + """ + + # get last out_layer_num blocks in reverse order + out_layer_num = len(self.anchor_masks) + blocks = input[-1:-out_layer_num - 1:-1] + + # fpn + yolo_feats = [] + route = None + for i, block in enumerate(blocks): + if i > 0: # perform concat in first 2 detection_block + block = fluid.layers.concat(input=[route, block], axis=1) + route, tip = self._detection_block( + block, + channel=512 // (2**i), + is_first=i == 0, + is_test=(not is_train), + conv_block_num=self.conv_block_num, + name=self.prefix_name + "fpn.{}".format(i)) + + yolo_feats.append(tip) + + if i < len(blocks) - 1: + # do not perform upsample in the last detection_block + route = self._conv_bn( + input=route, + ch_out=512 // (2**i), + filter_size=1, + stride=1, + padding=0, + act=self.act, + name=self.prefix_name + "fpn_transition.{}".format(i)) + # upsample + route = self._upsample(route) + + # pan + pan_feats = [yolo_feats[-1]] + route = yolo_feats[out_layer_num - 1] + for i in reversed(range(out_layer_num - 1)): + channel = 512 // (2**i) + route = self._conv_bn( + input=route, + ch_out=channel, + filter_size=3, + stride=2, + padding=1, + act=self.act, + name=self.prefix_name + "pan_transition.{}".format(i)) + block = yolo_feats[i] + block = fluid.layers.concat(input=[route, block], axis=1) + + route, tip = self._detection_block( + block, + channel=channel, + is_first=False, + is_test=(not is_train), + conv_block_num=self.conv_block_num, + name=self.prefix_name + "pan.{}".format(i)) + + pan_feats.append(tip) + + pan_feats = pan_feats[::-1] + outputs = [] + for i, block in enumerate(pan_feats): + if self.iou_aware: + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 6) + else: + num_filters = len(self.anchor_masks[i]) * (self.num_classes + 5) + with fluid.name_scope('yolo_output'): + block_out = fluid.layers.conv2d( + input=block, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + act=None, + param_attr=ParamAttr( + name=self.prefix_name + + "yolo_output.{}.conv.weights".format(i)), + bias_attr=ParamAttr( + regularizer=L2Decay(0.), + name=self.prefix_name + + "yolo_output.{}.conv.bias".format(i))) + outputs.append(block_out) + + return outputs diff --git a/static/ppdet/modeling/losses/iou_aware_loss.py b/static/ppdet/modeling/losses/iou_aware_loss.py index c68c7a707..d0aeb9df3 100644 --- a/static/ppdet/modeling/losses/iou_aware_loss.py +++ b/static/ppdet/modeling/losses/iou_aware_loss.py @@ -74,6 +74,7 @@ class IouAwareLoss(IouLoss): iouk = self._iou(pred, gt, ioup, eps) iouk.stop_gradient = True - loss_iou_aware = fluid.layers.cross_entropy(ioup, iouk, soft_label=True) + loss_iou_aware = fluid.layers.sigmoid_cross_entropy_with_logits(ioup, + iouk) loss_iou_aware = loss_iou_aware * self._loss_weight return loss_iou_aware diff --git a/static/ppdet/modeling/losses/yolo_loss.py b/static/ppdet/modeling/losses/yolo_loss.py index c16c6cb11..553e63322 100644 --- a/static/ppdet/modeling/losses/yolo_loss.py +++ b/static/ppdet/modeling/losses/yolo_loss.py @@ -238,7 +238,6 @@ class YOLOv3Loss(object): along channel dimension """ ioup = fluid.layers.slice(output, axes=[1], starts=[0], ends=[an_num]) - ioup = fluid.layers.sigmoid(ioup) oriout = fluid.layers.slice( output, axes=[1], -- GitLab