Add DiouLoss for YOLOv3. (#367)

7bdbc60d · hu jinda · GitHub · 6e84805b · 7bdbc60d · 7bdbc60d
3 changed file
--- a/configs/yolov3_darknet_voc_diouloss.yml
+++ b/configs/yolov3_darknet_voc_diouloss.yml
+architecture: YOLOv3
+use_gpu: true
+max_iters: 70000
+log_smooth_window: 20
+save_dir: output
+snapshot_iter: 2000
+metric: VOC
+map_type: 11point
+pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar
+weights: output/yolov3_darknet_voc/model_final
+num_classes: 20
+use_fine_grained_loss: false
+
+YOLOv3:
+  backbone: DarkNet
+  yolo_head: YOLOv3Head
+
+DarkNet:
+  norm_type: sync_bn
+  norm_decay: 0.
+  depth: 53
+
+YOLOv3Head:
+  anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
+  anchors: [[10, 13], [16, 30], [33, 23],
+            [30, 61], [62, 45], [59, 119],
+            [116, 90], [156, 198], [373, 326]]
+  norm_decay: 0.
+  yolo_loss: YOLOv3Loss
+  nms:
+    background_label: -1
+    keep_top_k: 100
+    nms_threshold: 0.45
+    nms_top_k: 1000
+    normalized: false
+    score_threshold: 0.01
+
+YOLOv3Loss:
+  batch_size: 8
+  ignore_thresh: 0.7
+  label_smooth: false
+  iou_loss: DiouLossYolo
+
+DiouLossYolo:
+  loss_weight: 5
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 55000
+    - 62000
+  - !LinearWarmup
+    start_factor: 0.
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0005
+    type: L2
+
+_READER_: 'yolov3_reader.yml'
+TrainReader:
+  inputs_def:
+    fields: ['image', 'gt_bbox', 'gt_class', 'gt_score']
+    num_max_boxes: 50
+  dataset:
+    !VOCDataSet
+    dataset_dir: dataset/voc
+    anno_path: trainval.txt
+    use_default_label: true
+    with_background: false
+
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_size', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+    num_max_boxes: 50
+  dataset:
+    !VOCDataSet
+    dataset_dir: dataset/voc
+    anno_path: test.txt
+    use_default_label: true
+    with_background: false
+
+TestReader:
+  dataset:
+    !ImageFolder
+    use_default_label: true
+    with_background: false
--- a/ppdet/modeling/losses/__init__.py
+++ b/ppdet/modeling/losses/__init__.py
@@ -20,6 +20,7 @@ from . import giou_loss
 from . import diou_loss
 from . import iou_loss
 from . import balanced_l1_loss
+from . import diou_loss_yolo

 from .yolo_loss import *
 from .smooth_l1_loss import *
@@ -27,3 +28,4 @@ from .giou_loss import *
 from .diou_loss import *
 from .iou_loss import *
 from .balanced_l1_loss import *
+from .diou_loss_yolo import *
--- a/ppdet/modeling/losses/diou_loss_yolo.py
+++ b/ppdet/modeling/losses/diou_loss_yolo.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from paddle.fluid.param_attr import ParamAttr
+from paddle.fluid.initializer import NumpyArrayInitializer
+
+from paddle import fluid
+from ppdet.core.workspace import register, serializable
+from .iou_loss import IouLoss
+
+__all__ = ['DiouLossYolo']
+
+
+@register
+@serializable
+class DiouLossYolo(IouLoss):
+    """
+    Distance-IoU Loss, see https://arxiv.org/abs/1911.08287
+    Args:
+        loss_weight (float): diou loss weight, default is 5
+        max_height (int): max height of input to support random shape input
+        max_width (int): max width of input to support random shape input
+    """
+
+    def __init__(self, loss_weight=5, max_height=608, max_width=608):
+        self._loss_weight = loss_weight
+        self._MAX_HI = max_height
+        self._MAX_WI = max_width
+
+    def __call__(self,
+                 x,
+                 y,
+                 w,
+                 h,
+                 tx,
+                 ty,
+                 tw,
+                 th,
+                 anchors,
+                 downsample_ratio,
+                 batch_size,
+                 eps=1.e-10):
+        '''
+        Args:
+            x  | y | w | h  ([Variables]): the output of yolov3 for encoded x|y|w|h
+            tx |ty |tw |th  ([Variables]): the target of yolov3 for encoded x|y|w|h
+            anchors ([float]): list of anchors for current output layer
+            downsample_ratio (float): the downsample ratio for current output layer
+            batch_size (int): training batch size
+            eps (float): the decimal to prevent the denominator eqaul zero
+        '''
+        x1, y1, x2, y2 = self._bbox_transform(
+            x, y, w, h, anchors, downsample_ratio, batch_size, False)
+        x1g, y1g, x2g, y2g = self._bbox_transform(
+            tx, ty, tw, th, anchors, downsample_ratio, batch_size, True)
+
+        #central coordinates
+        cx = (x1 + x2) / 2
+        cy = (y1 + y2) / 2
+        w = x2 - x1
+        h = y2 - y1
+
+        cxg = (x1g + x2g) / 2
+        cyg = (y1g + y2g) / 2
+        wg = x2g - x1g
+        hg = y2g - y1g
+
+        x2 = fluid.layers.elementwise_max(x1, x2)
+        y2 = fluid.layers.elementwise_max(y1, y2)
+        # A and B
+        xkis1 = fluid.layers.elementwise_max(x1, x1g)
+        ykis1 = fluid.layers.elementwise_max(y1, y1g)
+        xkis2 = fluid.layers.elementwise_min(x2, x2g)
+        ykis2 = fluid.layers.elementwise_min(y2, y2g)
+        # A or B
+        xc1 = fluid.layers.elementwise_min(x1, x1g)
+        yc1 = fluid.layers.elementwise_min(y1, y1g)
+        xc2 = fluid.layers.elementwise_max(x2, x2g)
+        yc2 = fluid.layers.elementwise_max(y2, y2g)
+
+        intsctk = (xkis2 - xkis1) * (ykis2 - ykis1)
+        intsctk = intsctk * fluid.layers.greater_than(
+            xkis2, xkis1) * fluid.layers.greater_than(ykis2, ykis1)
+        unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g
+                                                        ) - intsctk + eps
+        iouk = intsctk / unionk
+
+        # diou_loss
+        dist_intersection = (cx - cxg) * (cx - cxg) + (cy - cyg) * (cy - cyg)
+        dist_union = (xc2 - xc1) * (xc2 - xc1) + (yc2 - yc1) * (yc2 - yc1)
+        diou_term = (dist_intersection + eps) / (dist_union + eps)
+
+        loss_diou = 1. - iouk + diou_term
+        loss_diou = loss_diou * self._loss_weight
+
+        return loss_diou