test_yolov3_loss_op.py 12.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import division

17 18
import unittest
import numpy as np
19 20
from scipy.special import logit
from scipy.special import expit
21 22
from op_test import OpTest

23
import paddle
24 25
from paddle.fluid import core

D
dengkaipeng 已提交
26

27 28
def l1loss(x, y):
    return abs(x - y)
D
dengkaipeng 已提交
29 30


31
def sce(x, label):
32 33 34
    sigmoid_x = expit(x)
    term1 = label * np.log(sigmoid_x)
    term2 = (1.0 - label) * np.log(1.0 - sigmoid_x)
35
    return -term1 - term2
36 37


38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))


def batch_xywh_box_iou(box1, box2):
    b1_left = box1[:, :, 0] - box1[:, :, 2] / 2
    b1_right = box1[:, :, 0] + box1[:, :, 2] / 2
    b1_top = box1[:, :, 1] - box1[:, :, 3] / 2
    b1_bottom = box1[:, :, 1] + box1[:, :, 3] / 2

    b2_left = box2[:, :, 0] - box2[:, :, 2] / 2
    b2_right = box2[:, :, 0] + box2[:, :, 2] / 2
    b2_top = box2[:, :, 1] - box2[:, :, 3] / 2
    b2_bottom = box2[:, :, 1] + box2[:, :, 3] / 2

    left = np.maximum(b1_left[:, :, np.newaxis], b2_left[:, np.newaxis, :])
    right = np.minimum(b1_right[:, :, np.newaxis], b2_right[:, np.newaxis, :])
    top = np.maximum(b1_top[:, :, np.newaxis], b2_top[:, np.newaxis, :])
    bottom = np.minimum(b1_bottom[:, :, np.newaxis],
                        b2_bottom[:, np.newaxis, :])

    inter_w = np.clip(right - left, 0., 1.)
    inter_h = np.clip(bottom - top, 0., 1.)
    inter_area = inter_w * inter_h

    b1_area = (b1_right - b1_left) * (b1_bottom - b1_top)
    b2_area = (b2_right - b2_left) * (b2_bottom - b2_top)
    union = b1_area[:, :, np.newaxis] + b2_area[:, np.newaxis, :] - inter_area

    return inter_area / union


70
def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
71 72 73 74 75 76 77 78
    n, c, h, w = x.shape
    b = gtbox.shape[1]
    anchors = attrs['anchors']
    an_num = len(anchors) // 2
    anchor_mask = attrs['anchor_mask']
    mask_num = len(anchor_mask)
    class_num = attrs["class_num"]
    ignore_thresh = attrs['ignore_thresh']
79 80
    downsample_ratio = attrs['downsample_ratio']
    use_label_smooth = attrs['use_label_smooth']
81 82
    scale_x_y = attrs['scale_x_y']
    bias_x_y = -0.5 * (scale_x_y - 1.)
83
    input_size = downsample_ratio * h
84
    x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
85
    loss = np.zeros((n)).astype('float64')
86

X
xiaoting 已提交
87 88 89
    smooth_weight = min(1.0 / class_num, 1.0 / 40)
    label_pos = 1.0 - smooth_weight if use_label_smooth else 1.0
    label_neg = smooth_weight if use_label_smooth else 0.0
90

91 92 93
    pred_box = x[:, :, :, :, :4].copy()
    grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
    grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
94 95 96 97
    pred_box[:, :, :, :, 0] = (
        grid_x + sigmoid(pred_box[:, :, :, :, 0]) * scale_x_y + bias_x_y) / w
    pred_box[:, :, :, :, 1] = (
        grid_y + sigmoid(pred_box[:, :, :, :, 1]) * scale_x_y + bias_x_y) / h
98 99 100 101 102 103 104 105 106 107 108 109 110

    mask_anchors = []
    for m in anchor_mask:
        mask_anchors.append((anchors[2 * m], anchors[2 * m + 1]))
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in mask_anchors])
    anchor_w = anchors_s[:, 0:1].reshape((1, mask_num, 1, 1))
    anchor_h = anchors_s[:, 1:2].reshape((1, mask_num, 1, 1))
    pred_box[:, :, :, :, 2] = np.exp(pred_box[:, :, :, :, 2]) * anchor_w
    pred_box[:, :, :, :, 3] = np.exp(pred_box[:, :, :, :, 3]) * anchor_h

    pred_box = pred_box.reshape((n, -1, 4))
    pred_obj = x[:, :, :, :, 4].reshape((n, -1))
111
    objness = np.zeros(pred_box.shape[:2]).astype('float64')
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    ious = batch_xywh_box_iou(pred_box, gtbox)
    ious_max = np.max(ious, axis=-1)
    objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness),
                       objness)

    gtbox_shift = gtbox.copy()
    gtbox_shift[:, :, 0] = 0
    gtbox_shift[:, :, 1] = 0

    anchors = [(anchors[2 * i], anchors[2 * i + 1]) for i in range(0, an_num)]
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in anchors])
    anchor_boxes = np.concatenate(
        [np.zeros_like(anchors_s), anchors_s], axis=-1)
    anchor_boxes = np.tile(anchor_boxes[np.newaxis, :, :], (n, 1, 1))
    ious = batch_xywh_box_iou(gtbox_shift, anchor_boxes)
    iou_matches = np.argmax(ious, axis=-1)
129
    gt_matches = iou_matches.copy()
130 131 132
    for i in range(n):
        for j in range(b):
            if gtbox[i, j, 2:].sum() == 0:
133
                gt_matches[i, j] = -1
134 135
                continue
            if iou_matches[i, j] not in anchor_mask:
136
                gt_matches[i, j] = -1
137 138
                continue
            an_idx = anchor_mask.index(iou_matches[i, j])
139
            gt_matches[i, j] = an_idx
140 141 142 143 144 145 146
            gi = int(gtbox[i, j, 0] * w)
            gj = int(gtbox[i, j, 1] * h)

            tx = gtbox[i, j, 0] * w - gi
            ty = gtbox[i, j, 1] * w - gj
            tw = np.log(gtbox[i, j, 2] * input_size / mask_anchors[an_idx][0])
            th = np.log(gtbox[i, j, 3] * input_size / mask_anchors[an_idx][1])
147
            scale = (2.0 - gtbox[i, j, 2] * gtbox[i, j, 3]) * gtscore[i, j]
148 149
            loss[i] += sce(x[i, an_idx, gj, gi, 0], tx) * scale
            loss[i] += sce(x[i, an_idx, gj, gi, 1], ty) * scale
150 151
            loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale
            loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale
152

153
            objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j]
154 155

            for label_idx in range(class_num):
156 157 158
                loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos
                               if label_idx == gtlabel[i, j] else
                               label_neg) * gtscore[i, j]
159 160

        for j in range(mask_num * h * w):
D
dengkaipeng 已提交
161
            if objness[i, j] > 0:
162
                loss[i] += sce(pred_obj[i, j], 1.0) * objness[i, j]
D
dengkaipeng 已提交
163 164
            elif objness[i, j] == 0:
                loss[i] += sce(pred_obj[i, j], 0.0)
165

166
    return (loss, objness.reshape((n, mask_num, h, w)).astype('float64'), \
167
            gt_matches.astype('int32'))
168 169


170 171 172 173
class TestYolov3LossOp(OpTest):
    def setUp(self):
        self.initTestCase()
        self.op_type = 'yolov3_loss'
174 175
        x = logit(np.random.uniform(0, 1, self.x_shape).astype('float64'))
        gtbox = np.random.random(size=self.gtbox_shape).astype('float64')
D
dengkaipeng 已提交
176 177 178 179
        gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2])
        gtmask = np.random.randint(0, 2, self.gtbox_shape[:2])
        gtbox = gtbox * gtmask[:, :, np.newaxis]
        gtlabel = gtlabel * gtmask
180 181 182

        self.attrs = {
            "anchors": self.anchors,
183
            "anchor_mask": self.anchor_mask,
184 185
            "class_num": self.class_num,
            "ignore_thresh": self.ignore_thresh,
186 187
            "downsample_ratio": self.downsample_ratio,
            "use_label_smooth": self.use_label_smooth,
188
            "scale_x_y": self.scale_x_y,
189 190
        }

D
dengkaipeng 已提交
191 192
        self.inputs = {
            'X': x,
193
            'GTBox': gtbox.astype('float64'),
D
dengkaipeng 已提交
194
            'GTLabel': gtlabel.astype('int32'),
D
dengkaipeng 已提交
195
        }
196

197
        gtscore = np.ones(self.gtbox_shape[:2]).astype('float64')
198
        if self.gtscore:
199
            gtscore = np.random.random(self.gtbox_shape[:2]).astype('float64')
200 201
            self.inputs['GTScore'] = gtscore

202 203
        loss, objness, gt_matches = YOLOv3Loss(x, gtbox, gtlabel, gtscore,
                                               self.attrs)
204 205 206 207 208
        self.outputs = {
            'Loss': loss,
            'ObjectnessMask': objness,
            "GTMatchMask": gt_matches
        }
209 210

    def test_check_output(self):
211
        place = core.CPUPlace()
212
        self.check_output_with_place(place, atol=2e-3)
213

D
dengkaipeng 已提交
214 215
    def test_check_grad_ignore_gtbox(self):
        place = core.CPUPlace()
216
        self.check_grad_with_place(place, ['X'], 'Loss', max_relative_error=0.2)
217 218

    def initTestCase(self):
219 220 221 222 223
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
D
dengkaipeng 已提交
224
        self.class_num = 5
225
        self.ignore_thresh = 0.7
226
        self.downsample_ratio = 32
227
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
D
dengkaipeng 已提交
228
        self.gtbox_shape = (3, 5, 4)
229
        self.gtscore = True
230
        self.use_label_smooth = True
231
        self.scale_x_y = 1.
232 233 234


class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp):
235 236 237 238 239 240 241 242 243 244 245 246
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = True
247
        self.use_label_smooth = False
248
        self.scale_x_y = 1.
249 250


251 252 253 254 255 256 257 258 259 260 261 262 263 264
class TestYolov3LossNoGTScore(TestYolov3LossOp):
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = False
        self.use_label_smooth = True
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
        self.scale_x_y = 1.


class TestYolov3LossWithScaleXY(TestYolov3LossOp):
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = True
        self.use_label_smooth = True
        self.scale_x_y = 1.2
283 284


285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
class TestYolov3LossDygraph(unittest.TestCase):
    def test_dygraph(self):
        paddle.disable_static()
        x = np.random.random([2, 14, 8, 8]).astype('float32')
        gt_box = np.random.random([2, 10, 4]).astype('float32')
        gt_label = np.random.random([2, 10]).astype('int32')

        x = paddle.to_tensor(x)
        gt_box = paddle.to_tensor(gt_box)
        gt_label = paddle.to_tensor(gt_label)

        loss = paddle.vision.ops.yolo_loss(
            x,
            gt_box=gt_box,
            gt_label=gt_label,
            anchors=[10, 13, 16, 30],
            anchor_mask=[0, 1],
            class_num=2,
            ignore_thresh=0.7,
            downsample_ratio=8,
            use_label_smooth=True,
            scale_x_y=1.)
        assert loss is not None
        paddle.enable_static()


class TestYolov3LossStatic(unittest.TestCase):
    def test_static(self):
        x = paddle.static.data('x', [2, 14, 8, 8], 'float32')
        gt_box = paddle.static.data('gt_box', [2, 10, 4], 'float32')
        gt_label = paddle.static.data('gt_label', [2, 10], 'int32')
        gt_score = paddle.static.data('gt_score', [2, 10], 'float32')

        loss = paddle.vision.ops.yolo_loss(
            x,
            gt_box=gt_box,
            gt_label=gt_label,
            anchors=[10, 13, 16, 30],
            anchor_mask=[0, 1],
            class_num=2,
            ignore_thresh=0.7,
            downsample_ratio=8,
            gt_score=gt_score,
            use_label_smooth=True,
            scale_x_y=1.)
        assert loss is not None

        loss = paddle.vision.ops.yolo_loss(
            x,
            gt_box=gt_box,
            gt_label=gt_label,
            anchors=[10, 13, 16, 30],
            anchor_mask=[0, 1],
            class_num=2,
            ignore_thresh=0.7,
            downsample_ratio=8,
            use_label_smooth=True,
            scale_x_y=1.)
        assert loss is not None


346 347
if __name__ == "__main__":
    unittest.main()