Update YOLOv3_ENHANCEMENT.md (#206)

* update YOLOv3_ENHANCEMENT.md * update IoUloss and DropBlock configs for YOLOv3 related configs

Update YOLOv3_ENHANCEMENT.md (#206)
* update YOLOv3_ENHANCEMENT.md * update IoUloss and DropBlock configs for YOLOv3 related configs
791b8f41 · Yuan Gao · GitHub · 90a0e6c6 · 791b8f41 · 791b8f41
7 changed file
--- a/configs/dcn/yolov3_enhance_reader.yml
+++ b/configs/dcn/yolov3_enhance_reader.yml
+TrainReader:
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    num_max_boxes: 50
+  use_fine_grained_loss: true
+  dataset:
+    !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+    with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+    - !RandomCrop {}
+    - !RandomFlipImage
+      is_normalized: false
+    - !NormalizeBox {}
+    - !PadBox
+      num_max_boxes: 50
+    - !BboxXYXY2XYWH {}
+  batch_transforms:
+    - !RandomShape
+      sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
+      random_inter: True
+    - !NormalizeImage
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      is_scale: False
+      is_channel_first: false
+    - !Permute
+      to_bgr: false
+      channel_first: True
+    # Gt2YoloTarget is only used when use_fine_grained_loss set as true,
+    # this operator will be deleted automatically if use_fine_grained_loss
+    # is set as false
+    - !Gt2YoloTarget
+      anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+      anchors: [[10, 13], [16, 30], [33, 23],
+                [30, 61], [62, 45], [59, 119],
+                [116, 90], [156, 198], [373, 326]]
+      downsample_ratios: [32, 16, 8]
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+  worker_num: 8
+  bufsize: 32
+  use_process: true
+EvalReader:
+  inputs_def:
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id']
+    num_max_boxes: 50
+  dataset:
+    !COCODataSet
+    dataset_dir: dataset/coco
+    anno_path: annotations/instances_val2017.json
+    image_dir: val2017
+    with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+      with_mixup: false
+    - !ResizeImage
+      interp: 2
+      target_size: 608
+    - !NormalizeImage
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      is_scale: False
+      is_channel_first: false
+    - !Permute
+      to_bgr: false
+      channel_first: True
+  batch_size: 8
+  drop_empty: false
+  worker_num: 8
+  bufsize: 32
+TestReader:
+  inputs_def:
+    image_shape: [3, 608, 608]
+    fields: ['image', 'im_size', 'im_id']
+  dataset:
+    !ImageFolder
+      anno_path: annotations/instances_val2017.json
+      with_background: false
+  sample_transforms:
+    - !DecodeImage
+      to_rgb: True
+      with_mixup: false
+    - !ResizeImage
+      interp: 2
+      target_size: 608
+    - !NormalizeImage
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+      is_scale: False
+      is_channel_first: false
+    - !Permute
+      to_bgr: false
+      channel_first: True
+  batch_size: 1
--- a/configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml
+++ b/configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml
 architecture: YOLOv3
 use_gpu: true
-max_iters: 55000
+max_iters: 85000
 log_smooth_window: 20
 save_dir: output
 snapshot_iter: 10000
 metric: COCO
-pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
-weights: output/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco/model_final
+weights: output/yolov3_r50vd_dcn_db_iouloss_obj365_pretrained_coco/model_final
 num_classes: 80
 use_fine_grained_loss: true
@@ -39,6 +39,7 @@ YOLOv3Head:
    nms_top_k: 1000
    normalized: false
    score_threshold: 0.01
+  drop_block: true
 YOLOv3Loss:
  batch_size: 8
@@ -58,8 +59,8 @@ LearningRate:
  - !PiecewiseDecay
    gamma: 0.1
    milestones:
-    - 40000
+    - 55000
-    - 50000
+    - 75000
  - !LinearWarmup
    start_factor: 0.
    steps: 4000
@@ -72,4 +73,4 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-_READER_: '../yolov3_reader.yml'
+_READER_: 'yolov3_enhance_reader.yml'
--- a/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml
+++ b/configs/dcn/yolov3_r50vd_dcn_db_obj365_pretrained_coco.yml
+architecture: YOLOv3
+use_gpu: true
+max_iters: 85000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 10000
+metric: COCO
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
+weights: output/yolov3_r50vd_dcn_db_obj365_pretrained_coco/model_final
+num_classes: 80
+use_fine_grained_loss: true
+YOLOv3:
+  backbone: ResNet
+  yolo_head: YOLOv3Head
+  use_fine_grained_loss: true
+ResNet:
+  norm_type: sync_bn
+  freeze_at: 0
+  freeze_norm: false
+  norm_decay: 0.
+  depth: 50
+  feature_maps: [3, 4, 5]
+  variant: d
+  dcn_v2_stages: [5]
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  norm_decay: 0.
+  yolo_loss: YOLOv3Loss
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    normalized: false
+    score_threshold: 0.01
+  drop_block: true
+YOLOv3Loss:
+  batch_size: 8
+  ignore_thresh: 0.7
+  label_smooth: false
+  use_fine_grained_loss: true
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 55000
+    - 75000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 4000
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+_READER_: 'yolov3_enhance_reader.yml'
--- a/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml
+++ b/configs/dcn/yolov3_r50vd_dcn_obj365_pretrained_coco.yml
 architecture: YOLOv3
 use_gpu: true
-max_iters: 55000
+max_iters: 85000
 log_smooth_window: 20
 save_dir: output
 snapshot_iter: 10000
 metric: COCO
-pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar
-weights: output/yolov3_r50vd_dcn_obj365_pretrained_coco/model_final
+weights: output/yolov3_r50vd_dcn_db_obj365_pretrained_coco/model_final
 num_classes: 80
-use_fine_grained_loss: false
+use_fine_grained_loss: true
 YOLOv3:
  backbone: ResNet
  yolo_head: YOLOv3Head
+  use_fine_grained_loss: true
 ResNet:
  norm_type: sync_bn
@@ -43,6 +44,7 @@ YOLOv3Loss:
  batch_size: 8
  ignore_thresh: 0.7
  label_smooth: false
+  use_fine_grained_loss: true
 LearningRate:
  base_lr: 0.001
@@ -50,8 +52,8 @@ LearningRate:
  - !PiecewiseDecay
    gamma: 0.1
    milestones:
-    - 40000
+    - 55000
-    - 50000
+    - 75000
  - !LinearWarmup
    start_factor: 0.
    steps: 4000
@@ -64,106 +66,4 @@ OptimizerBuilder:
    factor: 0.0005
    type: L2
-TrainReader:
+_READER_: 'yolov3_enhance_reader.yml'
-  inputs_def:
-    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
-    num_max_boxes: 50
-  dataset:
-    !COCODataSet
-    image_dir: train2017
-    anno_path: annotations/instances_train2017.json
-    dataset_dir: dataset/coco
-    with_background: false
-  sample_transforms:
-    - !DecodeImage
-      to_rgb: True
-    - !RandomCrop {}
-    - !RandomFlipImage
-      is_normalized: false
-    - !NormalizeBox {}
-    - !PadBox
-      num_max_boxes: 50
-    - !BboxXYXY2XYWH {}
-  batch_transforms:
-    - !RandomShape
-      sizes: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]
-      random_inter: True
-    - !NormalizeImage
-      mean: [0.485, 0.456, 0.406]
-      std: [0.229, 0.224, 0.225]
-      is_scale: False
-      is_channel_first: false
-    - !Permute
-      to_bgr: false
-      channel_first: True
-    # Gt2YoloTarget is only used when use_fine_grained_loss set as true,
-    # this operator will be deleted automatically if use_fine_grained_loss
-    # is set as false
-    - !Gt2YoloTarget
-      anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
-      anchors: [[10, 13], [16, 30], [33, 23],
-                [30, 61], [62, 45], [59, 119],
-                [116, 90], [156, 198], [373, 326]]
-      downsample_ratios: [32, 16, 8]
-  batch_size: 8
-  shuffle: true
-  drop_last: true
-  worker_num: 8
-  bufsize: 32
-  use_process: true
-EvalReader:
-  inputs_def:
-    image_shape: [3, 608, 608]
-    fields: ['image', 'im_size', 'im_id']
-    num_max_boxes: 50
-  dataset:
-    !COCODataSet
-    dataset_dir: dataset/coco
-    anno_path: annotations/instances_val2017.json
-    image_dir: val2017
-    with_background: false
-  sample_transforms:
-    - !DecodeImage
-      to_rgb: True
-      with_mixup: false
-    - !ResizeImage
-      interp: 2
-      target_size: 608
-    - !NormalizeImage
-      mean: [0.485, 0.456, 0.406]
-      std: [0.229, 0.224, 0.225]
-      is_scale: False
-      is_channel_first: false
-    - !Permute
-      to_bgr: false
-      channel_first: True
-  batch_size: 8
-  drop_empty: false
-  worker_num: 8
-  bufsize: 32
-TestReader:
-  inputs_def:
-    image_shape: [3, 608, 608]
-    fields: ['image', 'im_size', 'im_id']
-  dataset:
-    !ImageFolder
-      anno_path: annotations/instances_val2017.json
-      with_background: false
-  sample_transforms:
-    - !DecodeImage
-      to_rgb: True
-      with_mixup: false
-    - !ResizeImage
-      interp: 2
-      target_size: 608
-    - !NormalizeImage
-      mean: [0.485, 0.456, 0.406]
-      std: [0.229, 0.224, 0.225]
-      is_scale: True
-      is_channel_first: false
-    - !Permute
-      to_bgr: false
-      channel_first: True
-  batch_size: 1
--- a/demo/dropblock.png
+++ b/demo/dropblock.png
--- a/docs/featured_model/YOLOv3_ENHANCEMENT.md
+++ b/docs/featured_model/YOLOv3_ENHANCEMENT.md
 # YOLOv3增强模型
 ---
 ## 简介
 [YOLOv3](https://arxiv.org/abs/1804.02767) 是由 [Joseph Redmon](https://arxiv.org/search/cs?searchtype=author&query=Redmon%2C+J) 和 [Ali Farhadi](https://arxiv.org/search/cs?searchtype=author&query=Farhadi%2C+A) 提出的单阶段检测器, 该检测
@@ -8,12 +9,22 @@
 PaddleDetection实现版本中使用了 [Bag of Freebies for Training Object Detection Neural Networks](https://arxiv.org/abs/1902.04103v3) 中提出的图像增强和label smooth等优化方法，精度优于darknet框架的实现版本，在COCO-2017数据集上，YOLOv3(DarkNet)达到`mAP(0.50:0.95)= 38.9`的精度，比darknet实现版本的精度(33.0)要高5.9。同时，在推断速度方面，基于Paddle预测库的加速方法，推断速度比darknet高30%。
-在此基础上，PaddleDetection对YOLOv3进一步改进，得到了更大的精度和速度优势。
+在此基础上，PaddleDetection对YOLOv3进一步改进，进一步提升了速度和精度，最终在COCO mAP上可以达到43.2。
 ## 方法描述
-将YOLOv3骨架网络更换为ResNet50-vd，同时在最后一个Residual block中引入[Deformable convolution v2](https://arxiv.org/abs/1811.11168)(可变形卷积)替代原始卷积操作。另外，使用[object365数据集](https://www.objects365.org/download.html)训练得到的模型作为coco数据集上的预训练模型，进一步提高YOLOv3的精度。
+1.将[YOLOv3](https://arxiv.org/pdf/1804.02767.pdf)骨架网络更换为[ResNet50-VD](https://arxiv.org/pdf/1812.01187.pdf)。ResNet50-VD网络相比原生的DarkNet53网络在速度和精度上都有一定的优势，且相较DarkNet53 ResNet系列更容易扩展，针对自己业务场景可以选择ResNet18、34、101等不同结构作为检测模型的主干网络。
+2.引入[Deformable Convolution v2](https://arxiv.org/abs/1811.11168)(可变形卷积)替代原始卷积操作，Deformable Convolution已经在多个视觉任务中广泛验证过其效果，在Yolo v3增强模型中考虑到速度与精度的平衡，我们仅使用Deformable Convolution替换了主干网络中Stage5部分的3x3卷积。
+3.在FPN部分增加[DropBlock](https://arxiv.org/abs/1810.12890)模块，提高模型泛化能力。Dropout操作如下图（b）中所示是分类网络中广泛使用的增强模型泛化能力的重要手段之一。DropBlock算法相比于Dropout算法，在Drop特征的时候会集中Drop掉某一块区域，更适应于在检测任务中提高网络泛化能力。
+![image-20200204141739840](../images/dropblock.png)
+4.Yolo v3作为一阶段检测网络，在定位精度上相比Faster RCNN，Cascade RCNN等网络结构有着其天然的劣势，增加[IoU Loss](https://arxiv.org/abs/1908.03851)分支，可以一定程度上提高BBox定位精度，缩小一阶段和两阶段检测网络的差距。
+5.使用[Object365数据集](https://www.objects365.org/download.html)训练得到的模型作为coco数据集上的预训练模型，Object365数据集包含约60万张图片以及365种类别，相比coco数据集进行预训练可以进一步提高YOLOv3的精度。
 ## 使用方法
@@ -21,15 +32,17 @@ PaddleDetection实现版本中使用了 [Bag of Freebies for Training Object Det
 ```bash
 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
-python tools/train.py -c configs/dcn/yolov3_r50vd_dcn.yml
+python tools/train.py -c configs/dcn/yolov3_r50vd_dcn_iouloss_obj365_pretrained_coco.yml
 ```
 更多模型参数请使用``python tools/train.py --help``查看，或参考[训练、评估及参数说明](../tutorials/GETTING_STARTED_cn.md)文档
 ### 模型效果
-|           模型         |     预训练模型     |    验证集 mAP   |        P4预测速度       |                         下载                           |
+|                   模型                   |                          预训练模型                          | 验证集 mAP |               P4预测速度               |                             下载                             |
-| :---------------------:|:-----------------: | :-------------: | :----------------------:|:-----------------------------------------------------: |
+| :--------------------------------------: | :----------------------------------------------------------: | :--------: | :------------------------------------: | :----------------------------------------------------------: |
-|      YOLOv3 DarkNet    |  [DarkNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar)             | 38.9 | 原生：88.3ms<br>tensorRT-FP32: 42.5ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) |
+|              YOLOv3 DarkNet              | [DarkNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar) |    38.9    | 原生：88.3ms<br>tensorRT-FP32: 42.5ms  | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_darknet.tar) |
-| YOLOv3 ResNet50_vd dcn | [ImageNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar)           | 39.1 | 原生：74.4ms<br>tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_imagenet.tar) |
+|          YOLOv3 ResNet50_vd DCN          | [ImageNet pretrain](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) |    39.1    | 原生：74.4ms<br>tensorRT-FP32: 35.2ms  | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_imagenet.tar) |
-| YOLOv3 ResNet50_vd dcn | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_obj365_pretrained.tar) | 41.4 | 原生：74.4ms<br>tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_obj365.tar) |
+|          YOLOv3 ResNet50_vd DCN          | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) |    42.5    | 原生：74.4ms<br>tensorRT-FP32: 35.2ms  | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_obj365_v2.tar) |
+|     YOLOv3 ResNet50_vd DCN DropBlock     | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) |    42.8    | 原生：74.4ms<br/>tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_db_obj365.tar) |
+| YOLOv3 ResNet50_vd DCN DropBlock IoULoss | [Object365 pretrain](https://paddlemodels.bj.bcebos.com/object_detection/ResNet50_vd_dcn_db_obj365_pretrained.tar) |    43.2    | 原生：74.4ms<br/>tensorRT-FP32: 35.2ms | [下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_r50vd_dcn_db_obj365.tar) |
--- a/docs/images/dropblock.png
+++ b/docs/images/dropblock.png