diff --git a/configs/yolov3_darknet_voc_diouloss.yml b/configs/yolov3_darknet_voc_diouloss.yml new file mode 100644 index 0000000000000000000000000000000000000000..52f5d0082238b45d3fa290c4c51e04bcd92c4d18 --- /dev/null +++ b/configs/yolov3_darknet_voc_diouloss.yml @@ -0,0 +1,94 @@ +architecture: YOLOv3 +use_gpu: true +max_iters: 70000 +log_smooth_window: 20 +save_dir: output +snapshot_iter: 2000 +metric: VOC +map_type: 11point +pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_pretrained.tar +weights: output/yolov3_darknet_voc/model_final +num_classes: 20 +use_fine_grained_loss: false + +YOLOv3: + backbone: DarkNet + yolo_head: YOLOv3Head + +DarkNet: + norm_type: sync_bn + norm_decay: 0. + depth: 53 + +YOLOv3Head: + anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + anchors: [[10, 13], [16, 30], [33, 23], + [30, 61], [62, 45], [59, 119], + [116, 90], [156, 198], [373, 326]] + norm_decay: 0. + yolo_loss: YOLOv3Loss + nms: + background_label: -1 + keep_top_k: 100 + nms_threshold: 0.45 + nms_top_k: 1000 + normalized: false + score_threshold: 0.01 + +YOLOv3Loss: + batch_size: 8 + ignore_thresh: 0.7 + label_smooth: false + iou_loss: DiouLossYolo + +DiouLossYolo: + loss_weight: 5 + +LearningRate: + base_lr: 0.001 + schedulers: + - !PiecewiseDecay + gamma: 0.1 + milestones: + - 55000 + - 62000 + - !LinearWarmup + start_factor: 0. + steps: 1000 + +OptimizerBuilder: + optimizer: + momentum: 0.9 + type: Momentum + regularizer: + factor: 0.0005 + type: L2 + +_READER_: 'yolov3_reader.yml' +TrainReader: + inputs_def: + fields: ['image', 'gt_bbox', 'gt_class', 'gt_score'] + num_max_boxes: 50 + dataset: + !VOCDataSet + dataset_dir: dataset/voc + anno_path: trainval.txt + use_default_label: true + with_background: false + +EvalReader: + inputs_def: + fields: ['image', 'im_size', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult'] + num_max_boxes: 50 + dataset: + !VOCDataSet + dataset_dir: dataset/voc + anno_path: test.txt + use_default_label: true + with_background: false + +TestReader: + dataset: + !ImageFolder + use_default_label: true + with_background: false diff --git a/ppdet/modeling/losses/__init__.py b/ppdet/modeling/losses/__init__.py index eabac9bf86a28431647a590373a60db8aea1090c..e82e7fbf6e2ca5ace2576ae271b4fa04b403e65c 100644 --- a/ppdet/modeling/losses/__init__.py +++ b/ppdet/modeling/losses/__init__.py @@ -20,6 +20,7 @@ from . import giou_loss from . import diou_loss from . import iou_loss from . import balanced_l1_loss +from . import diou_loss_yolo from .yolo_loss import * from .smooth_l1_loss import * @@ -27,3 +28,4 @@ from .giou_loss import * from .diou_loss import * from .iou_loss import * from .balanced_l1_loss import * +from .diou_loss_yolo import * diff --git a/ppdet/modeling/losses/diou_loss_yolo.py b/ppdet/modeling/losses/diou_loss_yolo.py new file mode 100644 index 0000000000000000000000000000000000000000..c627c178e9df186b6e48997b8238884c94b69ba7 --- /dev/null +++ b/ppdet/modeling/losses/diou_loss_yolo.py @@ -0,0 +1,111 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import numpy as np +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.initializer import NumpyArrayInitializer + +from paddle import fluid +from ppdet.core.workspace import register, serializable +from .iou_loss import IouLoss + +__all__ = ['DiouLossYolo'] + + +@register +@serializable +class DiouLossYolo(IouLoss): + """ + Distance-IoU Loss, see https://arxiv.org/abs/1911.08287 + Args: + loss_weight (float): diou loss weight, default is 5 + max_height (int): max height of input to support random shape input + max_width (int): max width of input to support random shape input + """ + + def __init__(self, loss_weight=5, max_height=608, max_width=608): + self._loss_weight = loss_weight + self._MAX_HI = max_height + self._MAX_WI = max_width + + def __call__(self, + x, + y, + w, + h, + tx, + ty, + tw, + th, + anchors, + downsample_ratio, + batch_size, + eps=1.e-10): + ''' + Args: + x | y | w | h ([Variables]): the output of yolov3 for encoded x|y|w|h + tx |ty |tw |th ([Variables]): the target of yolov3 for encoded x|y|w|h + anchors ([float]): list of anchors for current output layer + downsample_ratio (float): the downsample ratio for current output layer + batch_size (int): training batch size + eps (float): the decimal to prevent the denominator eqaul zero + ''' + x1, y1, x2, y2 = self._bbox_transform( + x, y, w, h, anchors, downsample_ratio, batch_size, False) + x1g, y1g, x2g, y2g = self._bbox_transform( + tx, ty, tw, th, anchors, downsample_ratio, batch_size, True) + + #central coordinates + cx = (x1 + x2) / 2 + cy = (y1 + y2) / 2 + w = x2 - x1 + h = y2 - y1 + + cxg = (x1g + x2g) / 2 + cyg = (y1g + y2g) / 2 + wg = x2g - x1g + hg = y2g - y1g + + x2 = fluid.layers.elementwise_max(x1, x2) + y2 = fluid.layers.elementwise_max(y1, y2) + # A and B + xkis1 = fluid.layers.elementwise_max(x1, x1g) + ykis1 = fluid.layers.elementwise_max(y1, y1g) + xkis2 = fluid.layers.elementwise_min(x2, x2g) + ykis2 = fluid.layers.elementwise_min(y2, y2g) + # A or B + xc1 = fluid.layers.elementwise_min(x1, x1g) + yc1 = fluid.layers.elementwise_min(y1, y1g) + xc2 = fluid.layers.elementwise_max(x2, x2g) + yc2 = fluid.layers.elementwise_max(y2, y2g) + + intsctk = (xkis2 - xkis1) * (ykis2 - ykis1) + intsctk = intsctk * fluid.layers.greater_than( + xkis2, xkis1) * fluid.layers.greater_than(ykis2, ykis1) + unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g + ) - intsctk + eps + iouk = intsctk / unionk + + # diou_loss + dist_intersection = (cx - cxg) * (cx - cxg) + (cy - cyg) * (cy - cyg) + dist_union = (xc2 - xc1) * (xc2 - xc1) + (yc2 - yc1) * (yc2 - yc1) + diou_term = (dist_intersection + eps) / (dist_union + eps) + + loss_diou = 1. - iouk + diou_term + loss_diou = loss_diou * self._loss_weight + + return loss_diou