From 6b76b6fcbc557c8244eb7da1c24f28734288ea16 Mon Sep 17 00:00:00 2001
From: wangxinxin08 <69842442+wangxinxin08@users.noreply.github.com>
Date: Mon, 1 Feb 2021 19:31:48 +0800
Subject: [PATCH] [Dygraph]yolo series (#2148)
* add ppyolo r18vd mbv3, yolov3 r50vd
* modify TestReader of ppyolo mbv3 r18vd
* add clip to avoid nan, modify ema to apply ema on bn mean and bn var
* fix code resulting in nan
* add yolov3 r50vd dcn configs
* finish yolo_series and fix some problems
* hide --bias flag and modify docs
* modify --bias and fix deploy/python/infer
---
dygraph/configs/ppyolo/README.md | 34 +++++++-
dygraph/configs/ppyolo/README_cn.md | 30 +++++++
.../configs/ppyolo/_base_/optimizer_1x.yml | 1 -
.../configs/ppyolo/_base_/optimizer_2x.yml | 1 -
.../ppyolo/_base_/ppyolo_mbv3_large.yml | 58 +++++++++++++
.../ppyolo/_base_/ppyolo_mbv3_small.yml | 58 +++++++++++++
.../configs/ppyolo/_base_/ppyolo_r18vd.yml | 60 ++++++++++++++
.../ppyolo/_base_/ppyolo_r50vd_dcn.yml | 4 +-
.../configs/ppyolo/ppyolo_mbv3_large_coco.yml | 82 +++++++++++++++++++
.../configs/ppyolo/ppyolo_mbv3_small_coco.yml | 82 +++++++++++++++++++
dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml | 82 +++++++++++++++++++
.../ppyolo/ppyolo_r50vd_dcn_1x_coco.yml | 1 +
.../ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml | 11 ++-
.../ppyolo/ppyolo_r50vd_dcn_2x_coco.yml | 1 +
.../configs/ppyolo/ppyolo_r50vd_dcn_voc.yml | 44 ++++++++++
dygraph/configs/yolov3/README.md | 1 +
.../yolov3/_base_/yolov3_darknet53.yml | 1 -
.../yolov3/_base_/yolov3_r50vd_dcn.yml | 47 +++++++++++
.../yolov3/yolov3_darknet53_270e_coco.yml | 4 +-
.../yolov3/yolov3_mobilenet_v1_270e_coco.yml | 4 +-
.../yolov3/yolov3_mobilenet_v1_270e_voc.yml | 4 +-
.../yolov3_mobilenet_v3_large_270e_coco.yml | 4 +-
.../yolov3_mobilenet_v3_large_270e_voc.yml | 4 +-
.../yolov3/yolov3_r50vd_dcn_270e_coco.yml | 10 +++
dygraph/deploy/python/infer.py | 7 ++
.../ppdet/data/transform/batch_operator.py | 3 +-
.../ppdet/data/transform/batch_operators.py | 3 +-
dygraph/ppdet/engine/trainer.py | 6 +-
dygraph/ppdet/metrics/coco_utils.py | 13 ++-
dygraph/ppdet/metrics/metrics.py | 8 +-
dygraph/ppdet/modeling/heads/yolo_head.py | 7 +-
dygraph/ppdet/modeling/losses/yolo_loss.py | 2 +-
dygraph/ppdet/modeling/necks/yolo_fpn.py | 68 +++++++++------
dygraph/ppdet/modeling/shape_spec.py | 5 +-
dygraph/ppdet/modeling/utils/bbox_util.py | 3 +-
dygraph/ppdet/optimizer.py | 16 ++--
dygraph/ppdet/py_op/post_process.py | 13 ++-
dygraph/ppdet/utils/checkpoint.py | 2 +-
dygraph/tools/eval.py | 13 ++-
39 files changed, 728 insertions(+), 69 deletions(-)
create mode 100644 dygraph/configs/ppyolo/_base_/ppyolo_mbv3_large.yml
create mode 100644 dygraph/configs/ppyolo/_base_/ppyolo_mbv3_small.yml
create mode 100644 dygraph/configs/ppyolo/_base_/ppyolo_r18vd.yml
create mode 100644 dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml
create mode 100644 dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml
create mode 100644 dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml
create mode 100644 dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml
create mode 100644 dygraph/configs/yolov3/_base_/yolov3_r50vd_dcn.yml
create mode 100644 dygraph/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml
diff --git a/dygraph/configs/ppyolo/README.md b/dygraph/configs/ppyolo/README.md
index 17b1e0da3..2d8a1be7d 100644
--- a/dygraph/configs/ppyolo/README.md
+++ b/dygraph/configs/ppyolo/README.md
@@ -37,9 +37,18 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods:
### PP-YOLO
| Model | GPU number | images/GPU | backbone | input shape | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | download | config |
-|:------------------------:|:----------:|:----------:|:----------:| :----------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :-----: |
+|:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: |
| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) |
**Notes:**
@@ -49,6 +58,29 @@ PP-YOLO improved performance and speed of YOLOv3 with following methods:
- PP-YOLO FP32 inference speed testing uses inference model exported by `tools/export_model.py` and benchmarked by running `depoly/python/infer.py` with `--run_benchmark`. All testing results do not contains the time cost of data reading and post-processing(NMS), which is same as [YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet) in testing method.
- TensorRT FP16 inference speed testing exclude the time cost of bounding-box decoding(`yolo_box`) part comparing with FP32 testing above, which means that data reading, bounding-box decoding and post-processing(NMS) is excluded(test method same as [YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet) too)
+### PP-YOLO for mobile
+
+| Model | GPU number | images/GPU | Model Size | input shape | Box APval | Box AP50val | Kirin 990 1xCore(FPS) | download | config |
+|:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: |
+| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_mbv3_large_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml) |
+| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_mbv3_small_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml) |
+
+**Notes:**
+
+- PP-YOLO_MobileNetV3 is trained on COCO train2017 datast and evaluated on val2017 dataset,Box APval is evaluation results of `mAP(IoU=0.5:0.95)`, Box APval is evaluation results of `mAP(IoU=0.5)`.
+- PP-YOLO_MobileNetV3 used 4 GPUs for training and mini-batch size as 32 on each GPU, if GPU number and mini-batch size is changed, learning rate and iteration times should be adjusted according [FAQ](../../../docs/FAQ.md).
+- PP-YOLO_MobileNetV3 inference speed is tested on Kirin 990 with 1 thread.
+
+### PP-YOLO on Pascal VOC
+
+PP-YOLO trained on Pascal VOC dataset as follows:
+
+| Model | GPU number | images/GPU | backbone | input shape | Box AP50val | download | config |
+|:------------------:|:----------:|:----------:|:----------:| :----------:| :--------------------: | :------: | :-----: |
+| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+
## Getting Start
### 1. Training
diff --git a/dygraph/configs/ppyolo/README_cn.md b/dygraph/configs/ppyolo/README_cn.md
index c1bd09d13..604c57f32 100644
--- a/dygraph/configs/ppyolo/README_cn.md
+++ b/dygraph/configs/ppyolo/README_cn.md
@@ -39,7 +39,16 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度:
| 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box APval | Box APtest | V100 FP32(FPS) | V100 TensorRT FP16(FPS) | 模型下载 | 配置文件 |
|:------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :-------------------: | :------------: | :---------------------: | :------: | :------: |
| PP-YOLO | 8 | 24 | ResNet50vd | 608 | 44.8 | 45.2 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 512 | 43.9 | 44.4 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 416 | 42.1 | 42.5 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
+| PP-YOLO | 8 | 24 | ResNet50vd | 320 | 38.9 | 39.3 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_1x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml) |
| PP-YOLO_2x | 8 | 24 | ResNet50vd | 608 | 45.3 | 45.9 | 72.9 | 155.6 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 512 | 44.4 | 45.0 | 89.9 | 188.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 416 | 42.7 | 43.2 | 109.1 | 215.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_2x | 8 | 24 | ResNet50vd | 320 | 39.5 | 40.1 | 132.2 | 242.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_2x_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml) |
+| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 512 | 29.3 | 29.5 | 357.1 | 657.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 416 | 28.6 | 28.9 | 409.8 | 719.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) |
+| PP-YOLO_ResNet18vd | 4 | 32 | ResNet18vd | 320 | 26.2 | 26.4 | 480.7 | 763.4 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r18vd_coco.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml) |
**注意:**
@@ -50,6 +59,27 @@ PP-YOLO从如下方面优化和提升YOLOv3模型的精度和速度:
- TensorRT FP16的速度测试相比于FP32去除了`yolo_box`(bbox解码)部分耗时,即不包含数据预处理,bbox解码和NMS(与[YOLOv4(AlexyAB)](https://github.com/AlexeyAB/darknet)测试方法一致)。
- PP-YOLO模型推理速度测试采用单卡V100,batch size=1进行测试,使用CUDA 10.2, CUDNN 7.5.1,TensorRT推理速度测试使用TensorRT 5.1.2.2。
+### PP-YOLO 轻量级模型
+
+| 模型 | GPU个数 | 每GPU图片个数 | 模型体积 | 输入尺寸 | Box APval | Box AP50val | Kirin 990 1xCore (FPS) | 模型下载 | 配置文件 |
+|:----------------------------:|:-------:|:-------------:|:----------:| :-------:| :------------------: | :--------------------: | :--------------------: | :------: | :------: |
+| PP-YOLO_MobileNetV3_large | 4 | 32 | 28MB | 320 | 23.2 | 42.6 | 14.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_mbv3_large_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml) |
+| PP-YOLO_MobileNetV3_small | 4 | 32 | 16MB | 320 | 17.2 | 33.8 | 21.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_mbv3_small_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml) |
+
+- PP-YOLO_MobileNetV3 模型使用COCO数据集中train2017作为训练集,使用val2017作为测试集,Box APval为`mAP(IoU=0.5:0.95)`评估结果, Box AP50val为`mAP(IoU=0.5)`评估结果。
+- PP-YOLO_MobileNetV3 模型训练过程中使用4GPU,每GPU batch size为32进行训练,如训练GPU数和batch size不使用上述配置,须参考[FAQ](../../../docs/FAQ.md)调整学习率和迭代次数。
+- PP-YOLO_MobileNetV3 模型推理速度测试环境配置为麒麟990芯片单线程。
+
+### Pascal VOC数据集上的PP-YOLO
+
+PP-YOLO在Pascal VOC数据集上训练模型如下:
+
+| 模型 | GPU个数 | 每GPU图片个数 | 骨干网络 | 输入尺寸 | Box AP50val | 模型下载 | 配置文件 |
+|:------------------:|:-------:|:-------------:|:----------:| :----------:| :--------------------: | :------: | :-----: |
+| PP-YOLO | 8 | 12 | ResNet50vd | 608 | 84.9 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 416 | 84.3 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+| PP-YOLO | 8 | 12 | ResNet50vd | 320 | 82.2 | [model](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ppyolo_r50vd_dcn_voc.pdparams) | [config](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml) |
+
## 使用说明
### 1. 训练
diff --git a/dygraph/configs/ppyolo/_base_/optimizer_1x.yml b/dygraph/configs/ppyolo/_base_/optimizer_1x.yml
index fe51b296c..8e6301e32 100644
--- a/dygraph/configs/ppyolo/_base_/optimizer_1x.yml
+++ b/dygraph/configs/ppyolo/_base_/optimizer_1x.yml
@@ -13,7 +13,6 @@ LearningRate:
steps: 4000
OptimizerBuilder:
- clip_grad_by_norm: 35.
optimizer:
momentum: 0.9
type: Momentum
diff --git a/dygraph/configs/ppyolo/_base_/optimizer_2x.yml b/dygraph/configs/ppyolo/_base_/optimizer_2x.yml
index c601a1860..92ddbf2a7 100644
--- a/dygraph/configs/ppyolo/_base_/optimizer_2x.yml
+++ b/dygraph/configs/ppyolo/_base_/optimizer_2x.yml
@@ -13,7 +13,6 @@ LearningRate:
steps: 4000
OptimizerBuilder:
- clip_grad_by_norm: 35.
optimizer:
momentum: 0.9
type: Momentum
diff --git a/dygraph/configs/ppyolo/_base_/ppyolo_mbv3_large.yml b/dygraph/configs/ppyolo/_base_/ppyolo_mbv3_large.yml
new file mode 100644
index 000000000..7b96fa9d7
--- /dev/null
+++ b/dygraph/configs/ppyolo/_base_/ppyolo_mbv3_large.yml
@@ -0,0 +1,58 @@
+architecture: YOLOv3
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_large_x1_0_ssld_pretrained.tar
+load_static_weights: true
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: MobileNetV3
+ neck: PPYOLOFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ model_name: large
+ scale: 1.
+ with_extra_blocks: false
+ extra_block_filters: []
+ feature_maps: [13, 16]
+
+PPYOLOFPN:
+ feat_channels: [160, 368]
+ coord_conv: true
+ conv_block_num: 0
+ spp: true
+ drop_block: true
+
+YOLOv3Head:
+ anchors: [[11, 18], [34, 47], [51, 126],
+ [115, 71], [120, 195], [254, 235]]
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.5
+ downsample: [32, 16]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ nms_threshold: 0.45
+ nms_top_k: 1000
+ score_threshold: 0.005
+ normalized: false
diff --git a/dygraph/configs/ppyolo/_base_/ppyolo_mbv3_small.yml b/dygraph/configs/ppyolo/_base_/ppyolo_mbv3_small.yml
new file mode 100644
index 000000000..edbf6a45c
--- /dev/null
+++ b/dygraph/configs/ppyolo/_base_/ppyolo_mbv3_small.yml
@@ -0,0 +1,58 @@
+architecture: YOLOv3
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV3_small_x1_0_ssld_pretrained.tar
+load_static_weights: true
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: MobileNetV3
+ neck: PPYOLOFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+MobileNetV3:
+ model_name: small
+ scale: 1.
+ with_extra_blocks: false
+ extra_block_filters: []
+ feature_maps: [9, 12]
+
+PPYOLOFPN:
+ feat_channels: [96, 304]
+ coord_conv: true
+ conv_block_num: 0
+ spp: true
+ drop_block: true
+
+YOLOv3Head:
+ anchors: [[11, 18], [34, 47], [51, 126],
+ [115, 71], [120, 195], [254, 235]]
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.5
+ downsample: [32, 16]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ nms_threshold: 0.45
+ nms_top_k: 1000
+ score_threshold: 0.005
+ normalized: false
diff --git a/dygraph/configs/ppyolo/_base_/ppyolo_r18vd.yml b/dygraph/configs/ppyolo/_base_/ppyolo_r18vd.yml
new file mode 100644
index 000000000..6a7bf0962
--- /dev/null
+++ b/dygraph/configs/ppyolo/_base_/ppyolo_r18vd.yml
@@ -0,0 +1,60 @@
+architecture: YOLOv3
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_vd_pretrained.tar
+load_static_weights: true
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+YOLOv3:
+ backbone: ResNet
+ neck: PPYOLOFPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 18
+ variant: d
+ return_idx: [2, 3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+PPYOLOFPN:
+ feat_channels: [512, 512]
+ drop_block: true
+ block_size: 3
+ keep_prob: 0.9
+ conv_block_num: 0
+
+YOLOv3Head:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[10, 14], [23, 27], [37, 58],
+ [81, 82], [135, 169], [344, 319]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16]
+ label_smooth: false
+ scale_x_y: 1.05
+ iou_loss: IouLoss
+
+IouLoss:
+ loss_weight: 2.5
+ loss_square: true
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.01
+ downsample_ratio: 32
+ clip_bbox: true
+ scale_x_y: 1.05
+ nms:
+ name: MatrixNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ post_threshold: 0.01
+ nms_top_k: -1
+ normalized: false
+ background_label: -1
diff --git a/dygraph/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml b/dygraph/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml
index 18111ad02..186251ca3 100644
--- a/dygraph/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml
+++ b/dygraph/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml
@@ -1,6 +1,5 @@
architecture: YOLOv3
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar
-weights: output/ppyolo_r50vd_dcn/model_final
load_static_weights: true
norm_type: sync_bn
use_ema: true
@@ -55,7 +54,7 @@ IouAwareLoss:
BBoxPostProcess:
decode:
name: YOLOBox
- conf_thresh: 0.005
+ conf_thresh: 0.01
downsample_ratio: 32
clip_bbox: true
scale_x_y: 1.05
@@ -66,3 +65,4 @@ BBoxPostProcess:
post_threshold: 0.01
nms_top_k: -1
normalized: false
+ background_label: -1
diff --git a/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml b/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml
new file mode 100644
index 000000000..b6f00b741
--- /dev/null
+++ b/dygraph/configs/ppyolo/ppyolo_mbv3_large_coco.yml
@@ -0,0 +1,82 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_mbv3_large.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 10
+weights: output/ppyolo_mbv3_large_coco/model_final
+
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - DecodeOp: {}
+ - MixupOp: {alpha: 1.5, beta: 1.5}
+ - RandomDistortOp: {}
+ - RandomExpandOp: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCropOp: {}
+ - RandomFlipOp: {}
+ batch_transforms:
+ - BatchRandomResizeOp:
+ target_size: [224, 256, 288, 320, 352, 384, 416, 448, 480, 512]
+ random_size: True
+ random_interp: True
+ keep_ratio: False
+ - NormalizeBoxOp: {}
+ - PadBoxOp: {num_max_boxes: 90}
+ - BboxXYXY2XYWHOp: {}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ - Gt2YoloTargetOp:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[11, 18], [34, 47], [51, 126], [115, 71], [120, 195], [254, 235]]
+ downsample_ratios: [32, 16]
+ iou_thresh: 0.25
+ num_classes: 80
+ batch_size: 32
+ mixup_epoch: 200
+ shuffle: true
+
+EvalReader:
+ sample_transforms:
+ - DecodeOp: {}
+ - ResizeOp: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 320, 320]
+ sample_transforms:
+ - DecodeOp: {}
+ - ResizeOp: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ batch_size: 1
+
+epoch: 270
+
+LearningRate:
+ base_lr: 0.005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 162
+ - 216
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml b/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml
new file mode 100644
index 000000000..1cdfe6dea
--- /dev/null
+++ b/dygraph/configs/ppyolo/ppyolo_mbv3_small_coco.yml
@@ -0,0 +1,82 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_mbv3_small.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 10
+weights: output/ppyolo_mbv3_small_coco/model_final
+
+TrainReader:
+ inputs_def:
+ num_max_boxes: 90
+ sample_transforms:
+ - DecodeOp: {}
+ - MixupOp: {alpha: 1.5, beta: 1.5}
+ - RandomDistortOp: {}
+ - RandomExpandOp: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCropOp: {}
+ - RandomFlipOp: {}
+ batch_transforms:
+ - BatchRandomResizeOp:
+ target_size: [224, 256, 288, 320, 352, 384, 416, 448, 480, 512]
+ random_size: True
+ random_interp: True
+ keep_ratio: False
+ - NormalizeBoxOp: {}
+ - PadBoxOp: {num_max_boxes: 90}
+ - BboxXYXY2XYWHOp: {}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ - Gt2YoloTargetOp:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[11, 18], [34, 47], [51, 126], [115, 71], [120, 195], [254, 235]]
+ downsample_ratios: [32, 16]
+ iou_thresh: 0.25
+ num_classes: 80
+ batch_size: 32
+ mixup_epoch: 200
+ shuffle: true
+
+EvalReader:
+ sample_transforms:
+ - DecodeOp: {}
+ - ResizeOp: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 320, 320]
+ sample_transforms:
+ - DecodeOp: {}
+ - ResizeOp: {target_size: [320, 320], keep_ratio: False, interp: 2}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ batch_size: 1
+
+epoch: 270
+
+LearningRate:
+ base_lr: 0.005
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 162
+ - 216
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml b/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml
new file mode 100644
index 000000000..e89dbb842
--- /dev/null
+++ b/dygraph/configs/ppyolo/ppyolo_r18vd_coco.yml
@@ -0,0 +1,82 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r18vd.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 10
+weights: output/ppyolo_r18vd_coco/model_final
+
+TrainReader:
+ sample_transforms:
+ - DecodeOp: {}
+ - MixupOp: {alpha: 1.5, beta: 1.5}
+ - RandomDistortOp: {}
+ - RandomExpandOp: {fill_value: [123.675, 116.28, 103.53]}
+ - RandomCropOp: {}
+ - RandomFlipOp: {}
+ batch_transforms:
+ - BatchRandomResizeOp:
+ target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+ random_size: True
+ random_interp: True
+ keep_ratio: False
+ - NormalizeBoxOp: {}
+ - PadBoxOp: {num_max_boxes: 50}
+ - BboxXYXY2XYWHOp: {}
+ - NormalizeImageOp:
+ mean: [0.485, 0.456, 0.406]
+ std: [0.229, 0.224, 0.225]
+ is_scale: True
+ - PermuteOp: {}
+ - Gt2YoloTargetOp:
+ anchor_masks: [[3, 4, 5], [0, 1, 2]]
+ anchors: [[10, 14], [23, 27], [37, 58], [81, 82], [135, 169], [344, 319]]
+ downsample_ratios: [32, 16]
+
+ batch_size: 32
+ mixup_epoch: 500
+ shuffle: true
+
+EvalReader:
+ sample_transforms:
+ - DecodeOp: {}
+ - ResizeOp: {target_size: [512, 512], keep_ratio: False, interp: 2}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ batch_size: 8
+ drop_empty: false
+
+TestReader:
+ inputs_def:
+ image_shape: [3, 512, 512]
+ sample_transforms:
+ - DecodeOp: {}
+ - ResizeOp: {target_size: [512, 512], keep_ratio: False, interp: 2}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ batch_size: 1
+
+epoch: 270
+
+LearningRate:
+ base_lr: 0.004
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 162
+ - 216
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml
index 4b1e2a797..918f3401e 100644
--- a/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml
+++ b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml
@@ -7,3 +7,4 @@ _BASE_: [
]
snapshot_epoch: 16
+weights: output/ppyolo_r50vd_dcn_1x_coco/model_final
diff --git a/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml
index 18945a9bd..87b976b99 100644
--- a/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml
+++ b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml
@@ -7,7 +7,8 @@ _BASE_: [
]
snapshot_epoch: 8
-use_ema: false
+use_ema: true
+weights: output/ppyolo_r50vd_dcn_1x_minicoco/model_final
TrainReader:
batch_size: 12
@@ -33,3 +34,11 @@ LearningRate:
- !LinearWarmup
start_factor: 0.
steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml
index 87646baf7..ac6531fe7 100644
--- a/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml
+++ b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml
@@ -7,3 +7,4 @@ _BASE_: [
]
snapshot_epoch: 16
+weights: output/ppyolo_r50vd_dcn_2x_coco/model_final
diff --git a/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml
new file mode 100644
index 000000000..4b2bcc492
--- /dev/null
+++ b/dygraph/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml
@@ -0,0 +1,44 @@
+_BASE_: [
+ '../datasets/voc.yml',
+ '../runtime.yml',
+ './_base_/ppyolo_r50vd_dcn.yml',
+ './_base_/optimizer_1x.yml',
+ './_base_/ppyolo_reader.yml',
+]
+
+snapshot_epoch: 83
+weights: output/ppyolo_r50vd_dcn_voc/model_final
+
+TrainReader:
+ batch_transforms:
+ - BatchRandomResizeOp: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
+ - NormalizeBoxOp: {}
+ - PadBoxOp: {num_max_boxes: 50}
+ - BboxXYXY2XYWHOp: {}
+ - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
+ - PermuteOp: {}
+ - Gt2YoloTargetOp: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8], num_classes: 20}
+ mixup_epoch: 350
+ batch_size: 12
+
+epoch: 583
+
+LearningRate:
+ base_lr: 0.00333
+ schedulers:
+ - !PiecewiseDecay
+ gamma: 0.1
+ milestones:
+ - 466
+ - 516
+ - !LinearWarmup
+ start_factor: 0.
+ steps: 4000
+
+OptimizerBuilder:
+ optimizer:
+ momentum: 0.9
+ type: Momentum
+ regularizer:
+ factor: 0.0005
+ type: L2
diff --git a/dygraph/configs/yolov3/README.md b/dygraph/configs/yolov3/README.md
index 116fc0901..8de4ea8ad 100644
--- a/dygraph/configs/yolov3/README.md
+++ b/dygraph/configs/yolov3/README.md
@@ -12,6 +12,7 @@
| DarkNet53 | 608 | 8 | 270e | ---- | 39.0 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
| DarkNet53 | 416 | 8 | 270e | ---- | 37.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
| DarkNet53 | 320 | 8 | 270e | ---- | 34.6 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_darknet53_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml) |
+| ResNet50_vd | 608 | 8 | 270e | ---- | 39.1 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_r50vd_dcn_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml) |
| MobileNet-V1 | 608 | 8 | 270e | ---- | 28.8 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
| MobileNet-V1 | 416 | 8 | 270e | ---- | 28.7 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
| MobileNet-V1 | 320 | 8 | 270e | ---- | 26.5 | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/dygraph/yolov3_mobilenet_v1_270e_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/master/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml) |
diff --git a/dygraph/configs/yolov3/_base_/yolov3_darknet53.yml b/dygraph/configs/yolov3/_base_/yolov3_darknet53.yml
index 796c24501..0f91cf030 100644
--- a/dygraph/configs/yolov3/_base_/yolov3_darknet53.yml
+++ b/dygraph/configs/yolov3/_base_/yolov3_darknet53.yml
@@ -1,6 +1,5 @@
architecture: YOLOv3
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
-use_fine_grained_loss: false
load_static_weights: True
norm_type: sync_bn
diff --git a/dygraph/configs/yolov3/_base_/yolov3_r50vd_dcn.yml b/dygraph/configs/yolov3/_base_/yolov3_r50vd_dcn.yml
new file mode 100644
index 000000000..f122cc365
--- /dev/null
+++ b/dygraph/configs/yolov3/_base_/yolov3_r50vd_dcn.yml
@@ -0,0 +1,47 @@
+architecture: YOLOv3
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_ssld_pretrained.tar
+load_static_weights: True
+norm_type: sync_bn
+
+YOLOv3:
+ backbone: ResNet
+ neck: YOLOv3FPN
+ yolo_head: YOLOv3Head
+ post_process: BBoxPostProcess
+
+ResNet:
+ depth: 50
+ variant: d
+ return_idx: [1, 2, 3]
+ dcn_v2_stages: [3]
+ freeze_at: -1
+ freeze_norm: false
+ norm_decay: 0.
+
+# YOLOv3FPN:
+
+YOLOv3Head:
+ anchors: [[10, 13], [16, 30], [33, 23],
+ [30, 61], [62, 45], [59, 119],
+ [116, 90], [156, 198], [373, 326]]
+ anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+ loss: YOLOv3Loss
+
+YOLOv3Loss:
+ ignore_thresh: 0.7
+ downsample: [32, 16, 8]
+ label_smooth: false
+
+BBoxPostProcess:
+ decode:
+ name: YOLOBox
+ conf_thresh: 0.005
+ downsample_ratio: 32
+ clip_bbox: true
+ nms:
+ name: MultiClassNMS
+ keep_top_k: 100
+ score_threshold: 0.01
+ nms_threshold: 0.45
+ nms_top_k: 1000
+ normalized: false
diff --git a/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml b/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml
index 15bc38e6c..4fbd401d3 100644
--- a/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml
+++ b/dygraph/configs/yolov3/yolov3_darknet53_270e_coco.yml
@@ -5,4 +5,6 @@ _BASE_: [
'_base_/yolov3_darknet53.yml',
'_base_/yolov3_reader.yml',
]
-weights: output/yolov3_darknet53_coco/model_final
+
+snapshot_epoch: 5
+weights: output/yolov3_darknet53_270e_coco/model_final
diff --git a/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml b/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml
index 4c3c30427..b9dd33bdb 100644
--- a/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml
+++ b/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml
@@ -5,4 +5,6 @@ _BASE_: [
'_base_/yolov3_mobilenet_v1.yml',
'_base_/yolov3_reader.yml',
]
-weights: output/yolov3_mobilenet_v1_coco/model_final
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v1_270e_coco/model_final
diff --git a/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml b/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml
index 5d4fb1929..df44e9262 100644
--- a/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml
+++ b/dygraph/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml
@@ -5,7 +5,9 @@ _BASE_: [
'_base_/yolov3_mobilenet_v1.yml',
'_base_/yolov3_reader.yml',
]
-weights: output/yolov3_mobilenet_v1_voc/model_final
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v1_270e_voc/model_final
TrainReader:
inputs_def:
diff --git a/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml b/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml
index dd06756ac..d1b8af566 100644
--- a/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml
+++ b/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml
@@ -5,4 +5,6 @@ _BASE_: [
'_base_/yolov3_mobilenet_v3_large.yml',
'_base_/yolov3_reader.yml',
]
-weights: output/yolov3_mobilenet_v3_large_coco/model_final
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v3_large_270e_coco/model_final
diff --git a/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml b/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml
index 26cd965d3..4b459415a 100644
--- a/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml
+++ b/dygraph/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml
@@ -5,7 +5,9 @@ _BASE_: [
'_base_/yolov3_mobilenet_v3_large.yml',
'_base_/yolov3_reader.yml',
]
-weights: output/yolov3_mobilenet_v3_large_voc/model_final
+
+snapshot_epoch: 5
+weights: output/yolov3_mobilenet_v3_large_270e_voc/model_final
TrainReader:
inputs_def:
diff --git a/dygraph/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml b/dygraph/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml
new file mode 100644
index 000000000..a07cbdde1
--- /dev/null
+++ b/dygraph/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml
@@ -0,0 +1,10 @@
+_BASE_: [
+ '../datasets/coco_detection.yml',
+ '../runtime.yml',
+ '_base_/optimizer_270e.yml',
+ '_base_/yolov3_r50vd_dcn.yml',
+ '_base_/yolov3_reader.yml',
+]
+
+snapshot_epoch: 5
+weights: output/yolov3_r50vd_dcn_270e_coco/model_final
diff --git a/dygraph/deploy/python/infer.py b/dygraph/deploy/python/infer.py
index e0aae03e5..41ddadf1b 100644
--- a/dygraph/deploy/python/infer.py
+++ b/dygraph/deploy/python/infer.py
@@ -135,6 +135,13 @@ class Detector(object):
output_names = self.predictor.get_output_names()
boxes_tensor = self.predictor.get_output_handle(output_names[0])
np_boxes = boxes_tensor.copy_to_cpu()
+ score_tensor = self.predictor.get_output_handle(output_names[3])
+ np_score = score_tensor.copy_to_cpu()
+ label_tensor = self.predictor.get_output_handle(output_names[2])
+ np_label = label_tensor.copy_to_cpu()
+ np_boxes = np.concatenate(
+ [np_label[:, np.newaxis], np_score[:, np.newaxis], np_boxes],
+ axis=-1)
if self.pred_config.mask_resolution is not None:
masks_tensor = self.predictor.get_output_handle(output_names[2])
np_masks = masks_tensor.copy_to_cpu()
diff --git a/dygraph/ppdet/data/transform/batch_operator.py b/dygraph/ppdet/data/transform/batch_operator.py
index 8712d8151..aabd0cf5d 100644
--- a/dygraph/ppdet/data/transform/batch_operator.py
+++ b/dygraph/ppdet/data/transform/batch_operator.py
@@ -290,7 +290,8 @@ class Gt2YoloTargetOp(BaseOperator):
iou = jaccard_overlap(
[0., 0., gw, gh],
[0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
- if iou > self.iou_thresh:
+ if iou > self.iou_thresh and target[idx, 5, gj,
+ gi] == 0.:
# x, y, w, h, scale
target[idx, 0, gj, gi] = gx * grid_w - gi
target[idx, 1, gj, gi] = gy * grid_h - gj
diff --git a/dygraph/ppdet/data/transform/batch_operators.py b/dygraph/ppdet/data/transform/batch_operators.py
index 143f2afa9..345c33898 100644
--- a/dygraph/ppdet/data/transform/batch_operators.py
+++ b/dygraph/ppdet/data/transform/batch_operators.py
@@ -319,7 +319,8 @@ class Gt2YoloTarget(BaseOperator):
iou = jaccard_overlap(
[0., 0., gw, gh],
[0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
- if iou > self.iou_thresh:
+ if iou > self.iou_thresh and target[idx, 5, gj,
+ gi] == 0.:
# x, y, w, h, scale
target[idx, 0, gj, gi] = gx * grid_w - gi
target[idx, 1, gj, gi] = gy * grid_h - gj
diff --git a/dygraph/ppdet/engine/trainer.py b/dygraph/ppdet/engine/trainer.py
index 33aaaaba2..d18492a4e 100644
--- a/dygraph/ppdet/engine/trainer.py
+++ b/dygraph/ppdet/engine/trainer.py
@@ -114,7 +114,11 @@ class Trainer(object):
self._metrics = []
return
if self.cfg.metric == 'COCO':
- self._metrics = [COCOMetric(anno_file=self.dataset.get_anno())]
+ # TODO: bias should be unified
+ self._metrics = [
+ COCOMetric(
+ anno_file=self.dataset.get_anno(), bias=self.cfg.bias)
+ ]
elif self.cfg.metric == 'VOC':
self._metrics = [
VOCMetric(
diff --git a/dygraph/ppdet/metrics/coco_utils.py b/dygraph/ppdet/metrics/coco_utils.py
index 5ace10df0..40929d0ad 100644
--- a/dygraph/ppdet/metrics/coco_utils.py
+++ b/dygraph/ppdet/metrics/coco_utils.py
@@ -24,7 +24,7 @@ from ppdet.utils.logger import setup_logger
logger = setup_logger(__name__)
-def get_infer_results(outs, catid):
+def get_infer_results(outs, catid, bias=0):
"""
Get result at the stage of inference.
The output format is dictionary containing bbox or mask result.
@@ -41,9 +41,14 @@ def get_infer_results(outs, catid):
infer_res = {}
if 'bbox' in outs:
- infer_res['bbox'] = get_det_res(outs['bbox'], outs['score'],
- outs['label'], outs['bbox_num'], im_id,
- catid)
+ infer_res['bbox'] = get_det_res(
+ outs['bbox'],
+ outs['score'],
+ outs['label'],
+ outs['bbox_num'],
+ im_id,
+ catid,
+ bias=bias)
if 'mask' in outs:
# mask post process
diff --git a/dygraph/ppdet/metrics/metrics.py b/dygraph/ppdet/metrics/metrics.py
index 6647b8382..304495f65 100644
--- a/dygraph/ppdet/metrics/metrics.py
+++ b/dygraph/ppdet/metrics/metrics.py
@@ -49,12 +49,13 @@ class Metric(paddle.metric.Metric):
class COCOMetric(Metric):
- def __init__(self, anno_file):
+ def __init__(self, anno_file, **kwargs):
assert os.path.isfile(anno_file), \
"anno_file {} not a file".format(anno_file)
self.anno_file = anno_file
self.clsid2catid, self.catid2name = get_categories('COCO', anno_file)
-
+ # TODO: bias should be unified
+ self.bias = kwargs.get('bias', 0)
self.reset()
def reset(self):
@@ -72,7 +73,8 @@ class COCOMetric(Metric):
outs['im_id'] = im_id.numpy() if isinstance(im_id,
paddle.Tensor) else im_id
- infer_results = get_infer_results(outs, self.clsid2catid)
+ infer_results = get_infer_results(
+ outs, self.clsid2catid, bias=self.bias)
self.results['bbox'] += infer_results[
'bbox'] if 'bbox' in infer_results else []
self.results['mask'] += infer_results[
diff --git a/dygraph/ppdet/modeling/heads/yolo_head.py b/dygraph/ppdet/modeling/heads/yolo_head.py
index ab32ce1e6..d6453a3a4 100644
--- a/dygraph/ppdet/modeling/heads/yolo_head.py
+++ b/dygraph/ppdet/modeling/heads/yolo_head.py
@@ -39,15 +39,16 @@ class YOLOv3Head(nn.Layer):
self.yolo_outputs = []
for i in range(len(self.anchors)):
+
if self.iou_aware:
- num_filters = self.num_outputs * (self.num_classes + 6)
+ num_filters = len(self.anchors[i]) * (self.num_classes + 6)
else:
- num_filters = self.num_outputs * (self.num_classes + 5)
+ num_filters = len(self.anchors[i]) * (self.num_classes + 5)
name = 'yolo_output.{}'.format(i)
yolo_output = self.add_sublayer(
name,
nn.Conv2D(
- in_channels=1024 // (2**i),
+ in_channels=128 * (2**self.num_outputs) // (2**i),
out_channels=num_filters,
kernel_size=1,
stride=1,
diff --git a/dygraph/ppdet/modeling/losses/yolo_loss.py b/dygraph/ppdet/modeling/losses/yolo_loss.py
index ad679a079..149139989 100644
--- a/dygraph/ppdet/modeling/losses/yolo_loss.py
+++ b/dygraph/ppdet/modeling/losses/yolo_loss.py
@@ -188,4 +188,4 @@ class YOLOv3Loss(nn.Layer):
loss += v
yolo_losses['loss'] = loss
- return yolo_losses
\ No newline at end of file
+ return yolo_losses
diff --git a/dygraph/ppdet/modeling/necks/yolo_fpn.py b/dygraph/ppdet/modeling/necks/yolo_fpn.py
index 4ef6935b3..f89b32053 100644
--- a/dygraph/ppdet/modeling/necks/yolo_fpn.py
+++ b/dygraph/ppdet/modeling/necks/yolo_fpn.py
@@ -249,6 +249,7 @@ class PPYOLOFPN(nn.Layer):
self.keep_prob = kwargs.get('keep_prob', 0.9)
self.spp = kwargs.get('spp', False)
+ self.conv_block_num = kwargs.get('conv_block_num', 2)
if self.coord_conv:
ConvLayer = CoordConv
else:
@@ -269,32 +270,53 @@ class PPYOLOFPN(nn.Layer):
if i > 0:
ch_in += 512 // (2**i)
channel = 64 * (2**self.num_blocks) // (2**i)
- base_cfg = [
- # name of layer, Layer, args
- ['conv0', ConvLayer, [ch_in, channel, 1]],
- ['conv1', ConvBNLayer, [channel, channel * 2, 3]],
- ['conv2', ConvLayer, [channel * 2, channel, 1]],
- ['conv3', ConvBNLayer, [channel, channel * 2, 3]],
- ['route', ConvLayer, [channel * 2, channel, 1]],
- ['tip', ConvLayer, [channel, channel * 2, 3]]
- ]
- for conf in base_cfg:
- filter_size = conf[-1][-1]
- conf.append(dict(padding=filter_size // 2, norm_type=norm_type))
- if i == 0:
- if self.spp:
- pool_size = [5, 9, 13]
+ base_cfg = []
+ c_in, c_out = ch_in, channel
+ for j in range(self.conv_block_num):
+ base_cfg += [
+ [
+ 'conv{}'.format(2 * j), ConvLayer, [c_in, c_out, 1],
+ dict(
+ padding=0, norm_type=norm_type)
+ ],
+ [
+ 'conv{}'.format(2 * j + 1), ConvBNLayer,
+ [c_out, c_out * 2, 3], dict(
+ padding=1, norm_type=norm_type)
+ ],
+ ]
+ c_in, c_out = c_out * 2, c_out
+
+ base_cfg += [[
+ 'route', ConvLayer, [c_in, c_out, 1], dict(
+ padding=0, norm_type=norm_type)
+ ], [
+ 'tip', ConvLayer, [c_out, c_out * 2, 3], dict(
+ padding=1, norm_type=norm_type)
+ ]]
+
+ if self.conv_block_num == 2:
+ if i == 0:
+ if self.spp:
+ spp_cfg = [[
+ 'spp', SPP, [channel * 4, channel, 1], dict(
+ pool_size=[5, 9, 13], norm_type=norm_type)
+ ]]
+ else:
+ spp_cfg = []
+ cfg = base_cfg[0:3] + spp_cfg + base_cfg[
+ 3:4] + dropblock_cfg + base_cfg[4:6]
+ else:
+ cfg = base_cfg[0:2] + dropblock_cfg + base_cfg[2:6]
+ elif self.conv_block_num == 0:
+ if self.spp and i == 0:
spp_cfg = [[
- 'spp', SPP,
- [channel * (len(pool_size) + 1), channel, 1], dict(
- pool_size=pool_size, norm_type=norm_type)
+ 'spp', SPP, [c_in * 4, c_in, 1], dict(
+ pool_size=[5, 9, 13], norm_type=norm_type)
]]
else:
spp_cfg = []
- cfg = base_cfg[0:3] + spp_cfg + base_cfg[
- 3:4] + dropblock_cfg + base_cfg[4:6]
- else:
- cfg = base_cfg[0:2] + dropblock_cfg + base_cfg[2:6]
+ cfg = spp_cfg + dropblock_cfg + base_cfg
name = 'yolo_block.{}'.format(i)
yolo_block = self.add_sublayer(name, PPYOLODetBlock(cfg, name))
self.yolo_blocks.append(yolo_block)
@@ -305,7 +327,7 @@ class PPYOLOFPN(nn.Layer):
name,
ConvBNLayer(
ch_in=channel,
- ch_out=channel // 2,
+ ch_out=256 // (2**i),
filter_size=1,
stride=1,
padding=0,
diff --git a/dygraph/ppdet/modeling/shape_spec.py b/dygraph/ppdet/modeling/shape_spec.py
index 78e4a3b00..a4d4a2fea 100644
--- a/dygraph/ppdet/modeling/shape_spec.py
+++ b/dygraph/ppdet/modeling/shape_spec.py
@@ -28,5 +28,6 @@ class ShapeSpec(
stride:
"""
- def __new__(cls, *, channels=None, height=None, width=None, stride=None):
- return super().__new__(cls, channels, height, width, stride)
+ def __new__(cls, channels=None, height=None, width=None, stride=None):
+ return super(ShapeSpec, cls).__new__(cls, channels, height, width,
+ stride)
diff --git a/dygraph/ppdet/modeling/utils/bbox_util.py b/dygraph/ppdet/modeling/utils/bbox_util.py
index 440b162f8..6ea3682b4 100644
--- a/dygraph/ppdet/modeling/utils/bbox_util.py
+++ b/dygraph/ppdet/modeling/utils/bbox_util.py
@@ -106,8 +106,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
x2 = paddle.minimum(px2, gx2)
y2 = paddle.minimum(py2, gy2)
- overlap = (x2 - x1) * (y2 - y1)
- overlap = overlap.clip(0)
+ overlap = ((x2 - x1).clip(0)) * ((y2 - y1).clip(0))
area1 = (px2 - px1) * (py2 - py1)
area1 = area1.clip(0)
diff --git a/dygraph/ppdet/optimizer.py b/dygraph/ppdet/optimizer.py
index e2e6123b3..c476e2edb 100644
--- a/dygraph/ppdet/optimizer.py
+++ b/dygraph/ppdet/optimizer.py
@@ -243,19 +243,15 @@ class ModelEMA(object):
self._decay = decay
model_dict = model.state_dict()
for k, v in self.state_dict.items():
- if '_mean' not in k and '_variance' not in k:
- v = decay * v + (1 - decay) * model_dict[k]
- v.stop_gradient = True
- self.state_dict[k] = v
- else:
- self.state_dict[k] = model_dict[k]
+ v = decay * v + (1 - decay) * model_dict[k]
+ v.stop_gradient = True
+ self.state_dict[k] = v
self.step += 1
def apply(self):
state_dict = dict()
for k, v in self.state_dict.items():
- if '_mean' not in k and '_variance' not in k:
- v = v / (1 - self._decay**self.step)
- v.stop_gradient = True
- state_dict[k] = v
+ v = v / (1 - self._decay**self.step)
+ v.stop_gradient = True
+ state_dict[k] = v
return state_dict
diff --git a/dygraph/ppdet/py_op/post_process.py b/dygraph/ppdet/py_op/post_process.py
index fcaeb2861..e7c5d9dbf 100755
--- a/dygraph/ppdet/py_op/post_process.py
+++ b/dygraph/ppdet/py_op/post_process.py
@@ -4,8 +4,13 @@ import numpy as np
import cv2
-def get_det_res(bboxes, scores, labels, bbox_nums, image_id,
- label_to_cat_id_map):
+def get_det_res(bboxes,
+ scores,
+ labels,
+ bbox_nums,
+ image_id,
+ label_to_cat_id_map,
+ bias=0):
det_res = []
k = 0
for i in range(len(bbox_nums)):
@@ -19,8 +24,8 @@ def get_det_res(bboxes, scores, labels, bbox_nums, image_id,
k = k + 1
xmin, ymin, xmax, ymax = box.tolist()
category_id = label_to_cat_id_map[label]
- w = xmax - xmin
- h = ymax - ymin
+ w = xmax - xmin + bias
+ h = ymax - ymin + bias
bbox = [xmin, ymin, w, h]
dt_res = {
'image_id': cur_image_id,
diff --git a/dygraph/ppdet/utils/checkpoint.py b/dygraph/ppdet/utils/checkpoint.py
index 1f4562233..38a7dc01a 100644
--- a/dygraph/ppdet/utils/checkpoint.py
+++ b/dygraph/ppdet/utils/checkpoint.py
@@ -163,7 +163,7 @@ def load_pretrain_weight(model,
model.backbone.set_dict(param_state_dict)
else:
ignore_set = set()
- for name, weight in model_dict:
+ for name, weight in model_dict.items():
if name in param_state_dict:
if weight.shape != param_state_dict[name].shape:
param_state_dict.pop(name, None)
diff --git a/dygraph/tools/eval.py b/dygraph/tools/eval.py
index 998710bff..690cc5501 100755
--- a/dygraph/tools/eval.py
+++ b/dygraph/tools/eval.py
@@ -47,7 +47,10 @@ def parse_args():
help="Evaluation directory, default is current directory.")
parser.add_argument(
- '--json_eval', action='store_true', default=False, help='')
+ '--json_eval',
+ action='store_true',
+ default=False,
+ help='Whether to re eval with already exists bbox.json or mask.json')
parser.add_argument(
"--slim_config",
@@ -55,6 +58,12 @@ def parse_args():
type=str,
help="Configuration file of slim method.")
+ # TODO: bias should be unified
+ parser.add_argument(
+ "--bias",
+ action="store_true",
+ help="whether add bias or not while getting w and h")
+
args = parser.parse_args()
return args
@@ -77,6 +86,8 @@ def main():
FLAGS = parse_args()
cfg = load_config(FLAGS.config)
+ # TODO: bias should be unified
+ cfg['bias'] = 1 if FLAGS.bias else 0
merge_config(FLAGS.opt)
if FLAGS.slim_config:
slim_cfg = load_config(FLAGS.slim_config)
--
GitLab