yolo_loss.py 7.5 KB
Newer Older
Q
qingqing01 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from ppdet.core.workspace import register

W
wangguanzhong 已提交
24
from ..bbox_utils import decode_yolo, xywh2xyxy, batch_iou_similarity
Q
qingqing01 已提交
25 26 27 28

__all__ = ['YOLOv3Loss']


W
wangxinxin08 已提交
29 30 31 32 33 34
def bbox_transform(pbox, anchor, downsample):
    pbox = decode_yolo(pbox, anchor, downsample)
    pbox = xywh2xyxy(pbox)
    return pbox


Q
qingqing01 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48
@register
class YOLOv3Loss(nn.Layer):

    __inject__ = ['iou_loss', 'iou_aware_loss']
    __shared__ = ['num_classes']

    def __init__(self,
                 num_classes=80,
                 ignore_thresh=0.7,
                 label_smooth=False,
                 downsample=[32, 16, 8],
                 scale_x_y=1.,
                 iou_loss=None,
                 iou_aware_loss=None):
W
wangxinxin08 已提交
49 50 51 52 53 54 55 56 57 58 59 60
        """
        YOLOv3Loss layer

        Args:
            num_calsses (int): number of foreground classes
            ignore_thresh (float): threshold to ignore confidence loss
            label_smooth (bool): whether to use label smoothing
            downsample (list): downsample ratio for each detection block
            scale_x_y (float): scale_x_y factor
            iou_loss (object): IoULoss instance
            iou_aware_loss (object): IouAwareLoss instance  
        """
Q
qingqing01 已提交
61 62 63 64 65 66 67 68
        super(YOLOv3Loss, self).__init__()
        self.num_classes = num_classes
        self.ignore_thresh = ignore_thresh
        self.label_smooth = label_smooth
        self.downsample = downsample
        self.scale_x_y = scale_x_y
        self.iou_loss = iou_loss
        self.iou_aware_loss = iou_aware_loss
69
        self.distill_pairs = []
Q
qingqing01 已提交
70 71

    def obj_loss(self, pbox, gbox, pobj, tobj, anchor, downsample):
W
wangxinxin08 已提交
72
        # pbox
Q
qingqing01 已提交
73 74
        pbox = decode_yolo(pbox, anchor, downsample)
        pbox = xywh2xyxy(pbox)
W
wangxinxin08 已提交
75 76 77 78 79 80 81
        pbox = paddle.concat(pbox, axis=-1)
        b = pbox.shape[0]
        pbox = pbox.reshape((b, -1, 4))
        # gbox
        gxy = gbox[:, :, 0:2] - gbox[:, :, 2:4] * 0.5
        gwh = gbox[:, :, 0:2] + gbox[:, :, 2:4] * 0.5
        gbox = paddle.concat([gxy, gwh], axis=-1)
Q
qingqing01 已提交
82

W
wangguanzhong 已提交
83
        iou = batch_iou_similarity(pbox, gbox)
Q
qingqing01 已提交
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112
        iou.stop_gradient = True
        iou_max = iou.max(2)  # [N, M1]
        iou_mask = paddle.cast(iou_max <= self.ignore_thresh, dtype=pbox.dtype)
        iou_mask.stop_gradient = True

        pobj = pobj.reshape((b, -1))
        tobj = tobj.reshape((b, -1))
        obj_mask = paddle.cast(tobj > 0, dtype=pbox.dtype)
        obj_mask.stop_gradient = True

        loss_obj = F.binary_cross_entropy_with_logits(
            pobj, obj_mask, reduction='none')
        loss_obj_pos = (loss_obj * tobj)
        loss_obj_neg = (loss_obj * (1 - obj_mask) * iou_mask)
        return loss_obj_pos + loss_obj_neg

    def cls_loss(self, pcls, tcls):
        if self.label_smooth:
            delta = min(1. / self.num_classes, 1. / 40)
            pos, neg = 1 - delta, delta
            # 1 for positive, 0 for negative
            tcls = pos * paddle.cast(
                tcls > 0., dtype=tcls.dtype) + neg * paddle.cast(
                    tcls <= 0., dtype=tcls.dtype)

        loss_cls = F.binary_cross_entropy_with_logits(
            pcls, tcls, reduction='none')
        return loss_cls

W
wangxinxin08 已提交
113
    def yolov3_loss(self, p, t, gt_box, anchor, downsample, scale=1.,
Q
qingqing01 已提交
114 115
                    eps=1e-10):
        na = len(anchor)
W
wangxinxin08 已提交
116
        b, c, h, w = p.shape
Q
qingqing01 已提交
117
        if self.iou_aware_loss:
W
wangxinxin08 已提交
118 119 120 121 122 123
            ioup, p = p[:, 0:na, :, :], p[:, na:, :, :]
            ioup = ioup.unsqueeze(-1)
        p = p.reshape((b, na, -1, h, w)).transpose((0, 1, 3, 4, 2))
        x, y = p[:, :, :, :, 0:1], p[:, :, :, :, 1:2]
        w, h = p[:, :, :, :, 2:3], p[:, :, :, :, 3:4]
        obj, pcls = p[:, :, :, :, 4:5], p[:, :, :, :, 5:]
124
        self.distill_pairs.append([x, y, w, h, obj, pcls])
W
wangxinxin08 已提交
125 126 127 128 129

        t = t.transpose((0, 1, 3, 4, 2))
        tx, ty = t[:, :, :, :, 0:1], t[:, :, :, :, 1:2]
        tw, th = t[:, :, :, :, 2:3], t[:, :, :, :, 3:4]
        tscale = t[:, :, :, :, 4:5]
Q
qingqing01 已提交
130 131 132 133
        tobj, tcls = t[:, :, :, :, 5:6], t[:, :, :, :, 6:]

        tscale_obj = tscale * tobj
        loss = dict()
W
wangxinxin08 已提交
134 135 136 137

        x = scale * F.sigmoid(x) - 0.5 * (scale - 1.)
        y = scale * F.sigmoid(y) - 0.5 * (scale - 1.)

Q
qingqing01 已提交
138
        if abs(scale - 1.) < eps:
W
wangxinxin08 已提交
139 140 141
            loss_x = F.binary_cross_entropy(x, tx, reduction='none')
            loss_y = F.binary_cross_entropy(y, ty, reduction='none')
            loss_xy = tscale_obj * (loss_x + loss_y)
Q
qingqing01 已提交
142
        else:
W
wangxinxin08 已提交
143 144 145
            loss_x = paddle.abs(x - tx)
            loss_y = paddle.abs(y - ty)
            loss_xy = tscale_obj * (loss_x + loss_y)
Q
qingqing01 已提交
146 147

        loss_xy = loss_xy.sum([1, 2, 3, 4]).mean()
W
wangxinxin08 已提交
148 149 150 151

        loss_w = paddle.abs(w - tw)
        loss_h = paddle.abs(h - th)
        loss_wh = tscale_obj * (loss_w + loss_h)
Q
qingqing01 已提交
152 153
        loss_wh = loss_wh.sum([1, 2, 3, 4]).mean()

W
wangxinxin08 已提交
154 155
        loss['loss_xy'] = loss_xy
        loss['loss_wh'] = loss_wh
Q
qingqing01 已提交
156 157

        if self.iou_loss is not None:
W
wangxinxin08 已提交
158 159 160 161 162 163 164
            # warn: do not modify x, y, w, h in place
            box, tbox = [x, y, w, h], [tx, ty, tw, th]
            pbox = bbox_transform(box, anchor, downsample)
            gbox = bbox_transform(tbox, anchor, downsample)
            loss_iou = self.iou_loss(pbox, gbox)
            loss_iou = loss_iou * tscale_obj
            loss_iou = loss_iou.sum([1, 2, 3, 4]).mean()
Q
qingqing01 已提交
165 166 167
            loss['loss_iou'] = loss_iou

        if self.iou_aware_loss is not None:
W
wangxinxin08 已提交
168 169 170 171 172 173
            box, tbox = [x, y, w, h], [tx, ty, tw, th]
            pbox = bbox_transform(box, anchor, downsample)
            gbox = bbox_transform(tbox, anchor, downsample)
            loss_iou_aware = self.iou_aware_loss(ioup, pbox, gbox)
            loss_iou_aware = loss_iou_aware * tobj
            loss_iou_aware = loss_iou_aware.sum([1, 2, 3, 4]).mean()
Q
qingqing01 已提交
174 175
            loss['loss_iou_aware'] = loss_iou_aware

W
wangxinxin08 已提交
176
        box = [x, y, w, h]
Q
qingqing01 已提交
177 178 179 180 181 182 183 184 185 186 187 188 189
        loss_obj = self.obj_loss(box, gt_box, obj, tobj, anchor, downsample)
        loss_obj = loss_obj.sum(-1).mean()
        loss['loss_obj'] = loss_obj
        loss_cls = self.cls_loss(pcls, tcls) * tobj
        loss_cls = loss_cls.sum([1, 2, 3, 4]).mean()
        loss['loss_cls'] = loss_cls
        return loss

    def forward(self, inputs, targets, anchors):
        np = len(inputs)
        gt_targets = [targets['target{}'.format(i)] for i in range(np)]
        gt_box = targets['gt_bbox']
        yolo_losses = dict()
190
        self.distill_pairs.clear()
Q
qingqing01 已提交
191 192
        for x, t, anchor, downsample in zip(inputs, gt_targets, anchors,
                                            self.downsample):
193 194 195
            yolo_loss = self.yolov3_loss(
                x.astype('float32'), t, gt_box, anchor, downsample,
                self.scale_x_y)
Q
qingqing01 已提交
196 197 198 199 200 201 202 203 204 205 206
            for k, v in yolo_loss.items():
                if k in yolo_losses:
                    yolo_losses[k] += v
                else:
                    yolo_losses[k] = v

        loss = 0
        for k, v in yolo_losses.items():
            loss += v

        yolo_losses['loss'] = loss
W
wangxinxin08 已提交
207
        return yolo_losses