test_yolov3_loss_op.py 10.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import division

17 18
import unittest
import numpy as np
19 20
from scipy.special import logit
from scipy.special import expit
21 22
from op_test import OpTest

23 24
from paddle.fluid import core

D
dengkaipeng 已提交
25

26 27
def l1loss(x, y):
    return abs(x - y)
D
dengkaipeng 已提交
28 29


30
def sce(x, label):
31 32 33
    sigmoid_x = expit(x)
    term1 = label * np.log(sigmoid_x)
    term2 = (1.0 - label) * np.log(1.0 - sigmoid_x)
34
    return -term1 - term2
35 36


37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))


def batch_xywh_box_iou(box1, box2):
    b1_left = box1[:, :, 0] - box1[:, :, 2] / 2
    b1_right = box1[:, :, 0] + box1[:, :, 2] / 2
    b1_top = box1[:, :, 1] - box1[:, :, 3] / 2
    b1_bottom = box1[:, :, 1] + box1[:, :, 3] / 2

    b2_left = box2[:, :, 0] - box2[:, :, 2] / 2
    b2_right = box2[:, :, 0] + box2[:, :, 2] / 2
    b2_top = box2[:, :, 1] - box2[:, :, 3] / 2
    b2_bottom = box2[:, :, 1] + box2[:, :, 3] / 2

    left = np.maximum(b1_left[:, :, np.newaxis], b2_left[:, np.newaxis, :])
    right = np.minimum(b1_right[:, :, np.newaxis], b2_right[:, np.newaxis, :])
    top = np.maximum(b1_top[:, :, np.newaxis], b2_top[:, np.newaxis, :])
    bottom = np.minimum(b1_bottom[:, :, np.newaxis],
                        b2_bottom[:, np.newaxis, :])

    inter_w = np.clip(right - left, 0., 1.)
    inter_h = np.clip(bottom - top, 0., 1.)
    inter_area = inter_w * inter_h

    b1_area = (b1_right - b1_left) * (b1_bottom - b1_top)
    b2_area = (b2_right - b2_left) * (b2_bottom - b2_top)
    union = b1_area[:, :, np.newaxis] + b2_area[:, np.newaxis, :] - inter_area

    return inter_area / union


69
def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
70 71 72 73 74 75 76 77
    n, c, h, w = x.shape
    b = gtbox.shape[1]
    anchors = attrs['anchors']
    an_num = len(anchors) // 2
    anchor_mask = attrs['anchor_mask']
    mask_num = len(anchor_mask)
    class_num = attrs["class_num"]
    ignore_thresh = attrs['ignore_thresh']
78 79
    downsample_ratio = attrs['downsample_ratio']
    use_label_smooth = attrs['use_label_smooth']
80 81
    scale_x_y = attrs['scale_x_y']
    bias_x_y = -0.5 * (scale_x_y - 1.)
82
    input_size = downsample_ratio * h
83
    x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
84
    loss = np.zeros((n)).astype('float64')
85

X
xiaoting 已提交
86 87 88
    smooth_weight = min(1.0 / class_num, 1.0 / 40)
    label_pos = 1.0 - smooth_weight if use_label_smooth else 1.0
    label_neg = smooth_weight if use_label_smooth else 0.0
89

90 91 92
    pred_box = x[:, :, :, :, :4].copy()
    grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
    grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
93 94 95 96
    pred_box[:, :, :, :, 0] = (
        grid_x + sigmoid(pred_box[:, :, :, :, 0]) * scale_x_y + bias_x_y) / w
    pred_box[:, :, :, :, 1] = (
        grid_y + sigmoid(pred_box[:, :, :, :, 1]) * scale_x_y + bias_x_y) / h
97 98 99 100 101 102 103 104 105 106 107 108 109

    mask_anchors = []
    for m in anchor_mask:
        mask_anchors.append((anchors[2 * m], anchors[2 * m + 1]))
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in mask_anchors])
    anchor_w = anchors_s[:, 0:1].reshape((1, mask_num, 1, 1))
    anchor_h = anchors_s[:, 1:2].reshape((1, mask_num, 1, 1))
    pred_box[:, :, :, :, 2] = np.exp(pred_box[:, :, :, :, 2]) * anchor_w
    pred_box[:, :, :, :, 3] = np.exp(pred_box[:, :, :, :, 3]) * anchor_h

    pred_box = pred_box.reshape((n, -1, 4))
    pred_obj = x[:, :, :, :, 4].reshape((n, -1))
110
    objness = np.zeros(pred_box.shape[:2]).astype('float64')
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
    ious = batch_xywh_box_iou(pred_box, gtbox)
    ious_max = np.max(ious, axis=-1)
    objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness),
                       objness)

    gtbox_shift = gtbox.copy()
    gtbox_shift[:, :, 0] = 0
    gtbox_shift[:, :, 1] = 0

    anchors = [(anchors[2 * i], anchors[2 * i + 1]) for i in range(0, an_num)]
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in anchors])
    anchor_boxes = np.concatenate(
        [np.zeros_like(anchors_s), anchors_s], axis=-1)
    anchor_boxes = np.tile(anchor_boxes[np.newaxis, :, :], (n, 1, 1))
    ious = batch_xywh_box_iou(gtbox_shift, anchor_boxes)
    iou_matches = np.argmax(ious, axis=-1)
128
    gt_matches = iou_matches.copy()
129 130 131
    for i in range(n):
        for j in range(b):
            if gtbox[i, j, 2:].sum() == 0:
132
                gt_matches[i, j] = -1
133 134
                continue
            if iou_matches[i, j] not in anchor_mask:
135
                gt_matches[i, j] = -1
136 137
                continue
            an_idx = anchor_mask.index(iou_matches[i, j])
138
            gt_matches[i, j] = an_idx
139 140 141 142 143 144 145
            gi = int(gtbox[i, j, 0] * w)
            gj = int(gtbox[i, j, 1] * h)

            tx = gtbox[i, j, 0] * w - gi
            ty = gtbox[i, j, 1] * w - gj
            tw = np.log(gtbox[i, j, 2] * input_size / mask_anchors[an_idx][0])
            th = np.log(gtbox[i, j, 3] * input_size / mask_anchors[an_idx][1])
146
            scale = (2.0 - gtbox[i, j, 2] * gtbox[i, j, 3]) * gtscore[i, j]
147 148
            loss[i] += sce(x[i, an_idx, gj, gi, 0], tx) * scale
            loss[i] += sce(x[i, an_idx, gj, gi, 1], ty) * scale
149 150
            loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale
            loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale
151

152
            objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j]
153 154

            for label_idx in range(class_num):
155 156 157
                loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos
                               if label_idx == gtlabel[i, j] else
                               label_neg) * gtscore[i, j]
158 159

        for j in range(mask_num * h * w):
D
dengkaipeng 已提交
160
            if objness[i, j] > 0:
161
                loss[i] += sce(pred_obj[i, j], 1.0) * objness[i, j]
D
dengkaipeng 已提交
162 163
            elif objness[i, j] == 0:
                loss[i] += sce(pred_obj[i, j], 0.0)
164

165
    return (loss, objness.reshape((n, mask_num, h, w)).astype('float64'), \
166
            gt_matches.astype('int32'))
167 168


169 170 171 172
class TestYolov3LossOp(OpTest):
    def setUp(self):
        self.initTestCase()
        self.op_type = 'yolov3_loss'
173 174
        x = logit(np.random.uniform(0, 1, self.x_shape).astype('float64'))
        gtbox = np.random.random(size=self.gtbox_shape).astype('float64')
D
dengkaipeng 已提交
175 176 177 178
        gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2])
        gtmask = np.random.randint(0, 2, self.gtbox_shape[:2])
        gtbox = gtbox * gtmask[:, :, np.newaxis]
        gtlabel = gtlabel * gtmask
179 180 181

        self.attrs = {
            "anchors": self.anchors,
182
            "anchor_mask": self.anchor_mask,
183 184
            "class_num": self.class_num,
            "ignore_thresh": self.ignore_thresh,
185 186
            "downsample_ratio": self.downsample_ratio,
            "use_label_smooth": self.use_label_smooth,
187
            "scale_x_y": self.scale_x_y,
188 189
        }

D
dengkaipeng 已提交
190 191
        self.inputs = {
            'X': x,
192
            'GTBox': gtbox.astype('float64'),
D
dengkaipeng 已提交
193
            'GTLabel': gtlabel.astype('int32'),
D
dengkaipeng 已提交
194
        }
195

196
        gtscore = np.ones(self.gtbox_shape[:2]).astype('float64')
197
        if self.gtscore:
198
            gtscore = np.random.random(self.gtbox_shape[:2]).astype('float64')
199 200
            self.inputs['GTScore'] = gtscore

201 202
        loss, objness, gt_matches = YOLOv3Loss(x, gtbox, gtlabel, gtscore,
                                               self.attrs)
203 204 205 206 207
        self.outputs = {
            'Loss': loss,
            'ObjectnessMask': objness,
            "GTMatchMask": gt_matches
        }
208 209

    def test_check_output(self):
210
        place = core.CPUPlace()
211
        self.check_output_with_place(place, atol=2e-3)
212

D
dengkaipeng 已提交
213 214
    def test_check_grad_ignore_gtbox(self):
        place = core.CPUPlace()
215
        self.check_grad_with_place(place, ['X'], 'Loss', max_relative_error=0.2)
216 217

    def initTestCase(self):
218 219 220 221 222
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
D
dengkaipeng 已提交
223
        self.class_num = 5
224
        self.ignore_thresh = 0.7
225
        self.downsample_ratio = 32
226
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
D
dengkaipeng 已提交
227
        self.gtbox_shape = (3, 5, 4)
228
        self.gtscore = True
229
        self.use_label_smooth = True
230
        self.scale_x_y = 1.
231 232 233


class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp):
234 235 236 237 238 239 240 241 242 243 244 245
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = True
246
        self.use_label_smooth = False
247
        self.scale_x_y = 1.
248 249


250 251 252 253 254 255 256 257 258 259 260 261 262 263
class TestYolov3LossNoGTScore(TestYolov3LossOp):
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = False
        self.use_label_smooth = True
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
        self.scale_x_y = 1.


class TestYolov3LossWithScaleXY(TestYolov3LossOp):
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = True
        self.use_label_smooth = True
        self.scale_x_y = 1.2
282 283


284 285
if __name__ == "__main__":
    unittest.main()