test_yolov3_loss_op.py 8.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import division

17 18
import unittest
import numpy as np
19 20
from scipy.special import logit
from scipy.special import expit
21 22
from op_test import OpTest

23 24
from paddle.fluid import core

D
dengkaipeng 已提交
25

26 27
def l1loss(x, y):
    return abs(x - y)
D
dengkaipeng 已提交
28 29


30
def sce(x, label):
31 32 33
    sigmoid_x = expit(x)
    term1 = label * np.log(sigmoid_x)
    term2 = (1.0 - label) * np.log(1.0 - sigmoid_x)
34
    return -term1 - term2
35 36


37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))


def batch_xywh_box_iou(box1, box2):
    b1_left = box1[:, :, 0] - box1[:, :, 2] / 2
    b1_right = box1[:, :, 0] + box1[:, :, 2] / 2
    b1_top = box1[:, :, 1] - box1[:, :, 3] / 2
    b1_bottom = box1[:, :, 1] + box1[:, :, 3] / 2

    b2_left = box2[:, :, 0] - box2[:, :, 2] / 2
    b2_right = box2[:, :, 0] + box2[:, :, 2] / 2
    b2_top = box2[:, :, 1] - box2[:, :, 3] / 2
    b2_bottom = box2[:, :, 1] + box2[:, :, 3] / 2

    left = np.maximum(b1_left[:, :, np.newaxis], b2_left[:, np.newaxis, :])
    right = np.minimum(b1_right[:, :, np.newaxis], b2_right[:, np.newaxis, :])
    top = np.maximum(b1_top[:, :, np.newaxis], b2_top[:, np.newaxis, :])
    bottom = np.minimum(b1_bottom[:, :, np.newaxis],
                        b2_bottom[:, np.newaxis, :])

    inter_w = np.clip(right - left, 0., 1.)
    inter_h = np.clip(bottom - top, 0., 1.)
    inter_area = inter_w * inter_h

    b1_area = (b1_right - b1_left) * (b1_bottom - b1_top)
    b2_area = (b2_right - b2_left) * (b2_bottom - b2_top)
    union = b1_area[:, :, np.newaxis] + b2_area[:, np.newaxis, :] - inter_area

    return inter_area / union


69
def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
70 71 72 73 74 75 76 77
    n, c, h, w = x.shape
    b = gtbox.shape[1]
    anchors = attrs['anchors']
    an_num = len(anchors) // 2
    anchor_mask = attrs['anchor_mask']
    mask_num = len(anchor_mask)
    class_num = attrs["class_num"]
    ignore_thresh = attrs['ignore_thresh']
78 79 80
    downsample_ratio = attrs['downsample_ratio']
    use_label_smooth = attrs['use_label_smooth']
    input_size = downsample_ratio * h
81 82 83
    x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
    loss = np.zeros((n)).astype('float32')

84 85 86
    label_pos = 1.0 - 1.0 / class_num if use_label_smooth else 1.0
    label_neg = 1.0 / class_num if use_label_smooth else 0.0

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    pred_box = x[:, :, :, :, :4].copy()
    grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
    grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
    pred_box[:, :, :, :, 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0])) / w
    pred_box[:, :, :, :, 1] = (grid_y + sigmoid(pred_box[:, :, :, :, 1])) / h

    mask_anchors = []
    for m in anchor_mask:
        mask_anchors.append((anchors[2 * m], anchors[2 * m + 1]))
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in mask_anchors])
    anchor_w = anchors_s[:, 0:1].reshape((1, mask_num, 1, 1))
    anchor_h = anchors_s[:, 1:2].reshape((1, mask_num, 1, 1))
    pred_box[:, :, :, :, 2] = np.exp(pred_box[:, :, :, :, 2]) * anchor_w
    pred_box[:, :, :, :, 3] = np.exp(pred_box[:, :, :, :, 3]) * anchor_h

    pred_box = pred_box.reshape((n, -1, 4))
    pred_obj = x[:, :, :, :, 4].reshape((n, -1))
D
dengkaipeng 已提交
105
    objness = np.zeros(pred_box.shape[:2]).astype('float32')
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
    ious = batch_xywh_box_iou(pred_box, gtbox)
    ious_max = np.max(ious, axis=-1)
    objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness),
                       objness)

    gtbox_shift = gtbox.copy()
    gtbox_shift[:, :, 0] = 0
    gtbox_shift[:, :, 1] = 0

    anchors = [(anchors[2 * i], anchors[2 * i + 1]) for i in range(0, an_num)]
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in anchors])
    anchor_boxes = np.concatenate(
        [np.zeros_like(anchors_s), anchors_s], axis=-1)
    anchor_boxes = np.tile(anchor_boxes[np.newaxis, :, :], (n, 1, 1))
    ious = batch_xywh_box_iou(gtbox_shift, anchor_boxes)
    iou_matches = np.argmax(ious, axis=-1)
123
    gt_matches = iou_matches.copy()
124 125 126
    for i in range(n):
        for j in range(b):
            if gtbox[i, j, 2:].sum() == 0:
127
                gt_matches[i, j] = -1
128 129
                continue
            if iou_matches[i, j] not in anchor_mask:
130
                gt_matches[i, j] = -1
131 132
                continue
            an_idx = anchor_mask.index(iou_matches[i, j])
133
            gt_matches[i, j] = an_idx
134 135 136 137 138 139 140
            gi = int(gtbox[i, j, 0] * w)
            gj = int(gtbox[i, j, 1] * h)

            tx = gtbox[i, j, 0] * w - gi
            ty = gtbox[i, j, 1] * w - gj
            tw = np.log(gtbox[i, j, 2] * input_size / mask_anchors[an_idx][0])
            th = np.log(gtbox[i, j, 3] * input_size / mask_anchors[an_idx][1])
141
            scale = (2.0 - gtbox[i, j, 2] * gtbox[i, j, 3]) * gtscore[i, j]
142 143
            loss[i] += sce(x[i, an_idx, gj, gi, 0], tx) * scale
            loss[i] += sce(x[i, an_idx, gj, gi, 1], ty) * scale
144 145
            loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale
            loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale
146

147
            objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j]
148 149

            for label_idx in range(class_num):
150 151 152
                loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos
                               if label_idx == gtlabel[i, j] else
                               label_neg) * gtscore[i, j]
153 154

        for j in range(mask_num * h * w):
D
dengkaipeng 已提交
155
            if objness[i, j] > 0:
156
                loss[i] += sce(pred_obj[i, j], 1.0) * objness[i, j]
D
dengkaipeng 已提交
157 158
            elif objness[i, j] == 0:
                loss[i] += sce(pred_obj[i, j], 0.0)
159

D
dengkaipeng 已提交
160
    return (loss, objness.reshape((n, mask_num, h, w)).astype('float32'), \
161
            gt_matches.astype('int32'))
162 163


164 165 166 167
class TestYolov3LossOp(OpTest):
    def setUp(self):
        self.initTestCase()
        self.op_type = 'yolov3_loss'
168
        x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32'))
169
        gtbox = np.random.random(size=self.gtbox_shape).astype('float32')
D
dengkaipeng 已提交
170
        gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2])
171
        gtscore = np.random.random(self.gtbox_shape[:2]).astype('float32')
D
dengkaipeng 已提交
172 173 174
        gtmask = np.random.randint(0, 2, self.gtbox_shape[:2])
        gtbox = gtbox * gtmask[:, :, np.newaxis]
        gtlabel = gtlabel * gtmask
175 176 177

        self.attrs = {
            "anchors": self.anchors,
178
            "anchor_mask": self.anchor_mask,
179 180
            "class_num": self.class_num,
            "ignore_thresh": self.ignore_thresh,
181 182
            "downsample_ratio": self.downsample_ratio,
            "use_label_smooth": self.use_label_smooth,
183 184
        }

D
dengkaipeng 已提交
185 186 187
        self.inputs = {
            'X': x,
            'GTBox': gtbox.astype('float32'),
D
dengkaipeng 已提交
188
            'GTLabel': gtlabel.astype('int32'),
189
            'GTScore': gtscore.astype('float32')
D
dengkaipeng 已提交
190
        }
191 192
        loss, objness, gt_matches = YOLOv3Loss(x, gtbox, gtlabel, gtscore,
                                               self.attrs)
193 194 195 196 197
        self.outputs = {
            'Loss': loss,
            'ObjectnessMask': objness,
            "GTMatchMask": gt_matches
        }
198 199

    def test_check_output(self):
200
        place = core.CPUPlace()
201
        self.check_output_with_place(place, atol=2e-3)
202

D
dengkaipeng 已提交
203 204 205 206 207
    def test_check_grad_ignore_gtbox(self):
        place = core.CPUPlace()
        self.check_grad_with_place(
            place, ['X'],
            'Loss',
208 209
            no_grad_set=set(["GTBox", "GTLabel", "GTScore"]),
            max_relative_error=0.2)
210 211

    def initTestCase(self):
212 213 214 215 216
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
D
dengkaipeng 已提交
217 218
        self.class_num = 5
        self.ignore_thresh = 0.5
219
        self.downsample_ratio = 32
220
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
D
dengkaipeng 已提交
221
        self.gtbox_shape = (3, 5, 4)
222 223 224 225 226 227
        self.use_label_smooth = True


class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp):
    def set_label_smooth(self):
        self.use_label_smooth = False
228 229 230 231


if __name__ == "__main__":
    unittest.main()