test_yolov3_loss_op.py 9.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import division

17 18
import unittest
import numpy as np
19 20
from scipy.special import logit
from scipy.special import expit
21 22
from op_test import OpTest

23 24
from paddle.fluid import core

D
dengkaipeng 已提交
25

26 27
def l1loss(x, y):
    return abs(x - y)
D
dengkaipeng 已提交
28 29


30
def sce(x, label):
31 32 33
    sigmoid_x = expit(x)
    term1 = label * np.log(sigmoid_x)
    term2 = (1.0 - label) * np.log(1.0 - sigmoid_x)
34
    return -term1 - term2
35 36


37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
def sigmoid(x):
    return 1.0 / (1.0 + np.exp(-1.0 * x))


def batch_xywh_box_iou(box1, box2):
    b1_left = box1[:, :, 0] - box1[:, :, 2] / 2
    b1_right = box1[:, :, 0] + box1[:, :, 2] / 2
    b1_top = box1[:, :, 1] - box1[:, :, 3] / 2
    b1_bottom = box1[:, :, 1] + box1[:, :, 3] / 2

    b2_left = box2[:, :, 0] - box2[:, :, 2] / 2
    b2_right = box2[:, :, 0] + box2[:, :, 2] / 2
    b2_top = box2[:, :, 1] - box2[:, :, 3] / 2
    b2_bottom = box2[:, :, 1] + box2[:, :, 3] / 2

    left = np.maximum(b1_left[:, :, np.newaxis], b2_left[:, np.newaxis, :])
    right = np.minimum(b1_right[:, :, np.newaxis], b2_right[:, np.newaxis, :])
    top = np.maximum(b1_top[:, :, np.newaxis], b2_top[:, np.newaxis, :])
    bottom = np.minimum(b1_bottom[:, :, np.newaxis],
                        b2_bottom[:, np.newaxis, :])

    inter_w = np.clip(right - left, 0., 1.)
    inter_h = np.clip(bottom - top, 0., 1.)
    inter_area = inter_w * inter_h

    b1_area = (b1_right - b1_left) * (b1_bottom - b1_top)
    b2_area = (b2_right - b2_left) * (b2_bottom - b2_top)
    union = b1_area[:, :, np.newaxis] + b2_area[:, np.newaxis, :] - inter_area

    return inter_area / union


69
def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
70 71 72 73 74 75 76 77
    n, c, h, w = x.shape
    b = gtbox.shape[1]
    anchors = attrs['anchors']
    an_num = len(anchors) // 2
    anchor_mask = attrs['anchor_mask']
    mask_num = len(anchor_mask)
    class_num = attrs["class_num"]
    ignore_thresh = attrs['ignore_thresh']
78 79 80
    downsample_ratio = attrs['downsample_ratio']
    use_label_smooth = attrs['use_label_smooth']
    input_size = downsample_ratio * h
81 82 83
    x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
    loss = np.zeros((n)).astype('float32')

84 85 86
    label_pos = 1.0 - 1.0 / class_num if use_label_smooth else 1.0
    label_neg = 1.0 / class_num if use_label_smooth else 0.0

87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    pred_box = x[:, :, :, :, :4].copy()
    grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
    grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
    pred_box[:, :, :, :, 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0])) / w
    pred_box[:, :, :, :, 1] = (grid_y + sigmoid(pred_box[:, :, :, :, 1])) / h

    mask_anchors = []
    for m in anchor_mask:
        mask_anchors.append((anchors[2 * m], anchors[2 * m + 1]))
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in mask_anchors])
    anchor_w = anchors_s[:, 0:1].reshape((1, mask_num, 1, 1))
    anchor_h = anchors_s[:, 1:2].reshape((1, mask_num, 1, 1))
    pred_box[:, :, :, :, 2] = np.exp(pred_box[:, :, :, :, 2]) * anchor_w
    pred_box[:, :, :, :, 3] = np.exp(pred_box[:, :, :, :, 3]) * anchor_h

    pred_box = pred_box.reshape((n, -1, 4))
    pred_obj = x[:, :, :, :, 4].reshape((n, -1))
D
dengkaipeng 已提交
105
    objness = np.zeros(pred_box.shape[:2]).astype('float32')
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
    ious = batch_xywh_box_iou(pred_box, gtbox)
    ious_max = np.max(ious, axis=-1)
    objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness),
                       objness)

    gtbox_shift = gtbox.copy()
    gtbox_shift[:, :, 0] = 0
    gtbox_shift[:, :, 1] = 0

    anchors = [(anchors[2 * i], anchors[2 * i + 1]) for i in range(0, an_num)]
    anchors_s = np.array(
        [(an_w / input_size, an_h / input_size) for an_w, an_h in anchors])
    anchor_boxes = np.concatenate(
        [np.zeros_like(anchors_s), anchors_s], axis=-1)
    anchor_boxes = np.tile(anchor_boxes[np.newaxis, :, :], (n, 1, 1))
    ious = batch_xywh_box_iou(gtbox_shift, anchor_boxes)
    iou_matches = np.argmax(ious, axis=-1)
123
    gt_matches = iou_matches.copy()
124 125 126
    for i in range(n):
        for j in range(b):
            if gtbox[i, j, 2:].sum() == 0:
127
                gt_matches[i, j] = -1
128 129
                continue
            if iou_matches[i, j] not in anchor_mask:
130
                gt_matches[i, j] = -1
131 132
                continue
            an_idx = anchor_mask.index(iou_matches[i, j])
133
            gt_matches[i, j] = an_idx
134 135 136 137 138 139 140
            gi = int(gtbox[i, j, 0] * w)
            gj = int(gtbox[i, j, 1] * h)

            tx = gtbox[i, j, 0] * w - gi
            ty = gtbox[i, j, 1] * w - gj
            tw = np.log(gtbox[i, j, 2] * input_size / mask_anchors[an_idx][0])
            th = np.log(gtbox[i, j, 3] * input_size / mask_anchors[an_idx][1])
141
            scale = (2.0 - gtbox[i, j, 2] * gtbox[i, j, 3]) * gtscore[i, j]
142 143
            loss[i] += sce(x[i, an_idx, gj, gi, 0], tx) * scale
            loss[i] += sce(x[i, an_idx, gj, gi, 1], ty) * scale
144 145
            loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale
            loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale
146

147
            objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j]
148 149

            for label_idx in range(class_num):
150 151 152
                loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos
                               if label_idx == gtlabel[i, j] else
                               label_neg) * gtscore[i, j]
153 154

        for j in range(mask_num * h * w):
D
dengkaipeng 已提交
155
            if objness[i, j] > 0:
156
                loss[i] += sce(pred_obj[i, j], 1.0) * objness[i, j]
D
dengkaipeng 已提交
157 158
            elif objness[i, j] == 0:
                loss[i] += sce(pred_obj[i, j], 0.0)
159

D
dengkaipeng 已提交
160
    return (loss, objness.reshape((n, mask_num, h, w)).astype('float32'), \
161
            gt_matches.astype('int32'))
162 163


164 165 166 167
class TestYolov3LossOp(OpTest):
    def setUp(self):
        self.initTestCase()
        self.op_type = 'yolov3_loss'
168
        x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32'))
169
        gtbox = np.random.random(size=self.gtbox_shape).astype('float32')
D
dengkaipeng 已提交
170 171 172 173
        gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2])
        gtmask = np.random.randint(0, 2, self.gtbox_shape[:2])
        gtbox = gtbox * gtmask[:, :, np.newaxis]
        gtlabel = gtlabel * gtmask
174 175 176

        self.attrs = {
            "anchors": self.anchors,
177
            "anchor_mask": self.anchor_mask,
178 179
            "class_num": self.class_num,
            "ignore_thresh": self.ignore_thresh,
180 181
            "downsample_ratio": self.downsample_ratio,
            "use_label_smooth": self.use_label_smooth,
182 183
        }

D
dengkaipeng 已提交
184 185 186
        self.inputs = {
            'X': x,
            'GTBox': gtbox.astype('float32'),
D
dengkaipeng 已提交
187
            'GTLabel': gtlabel.astype('int32'),
D
dengkaipeng 已提交
188
        }
189 190 191 192 193 194

        gtscore = np.ones(self.gtbox_shape[:2]).astype('float32')
        if self.gtscore:
            gtscore = np.random.random(self.gtbox_shape[:2]).astype('float32')
            self.inputs['GTScore'] = gtscore

195 196
        loss, objness, gt_matches = YOLOv3Loss(x, gtbox, gtlabel, gtscore,
                                               self.attrs)
197 198 199 200 201
        self.outputs = {
            'Loss': loss,
            'ObjectnessMask': objness,
            "GTMatchMask": gt_matches
        }
202 203

    def test_check_output(self):
204
        place = core.CPUPlace()
205
        self.check_output_with_place(place, atol=2e-3)
206

D
dengkaipeng 已提交
207 208
    def test_check_grad_ignore_gtbox(self):
        place = core.CPUPlace()
209
        self.check_grad_with_place(place, ['X'], 'Loss', max_relative_error=0.2)
210 211

    def initTestCase(self):
212 213 214 215 216
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
D
dengkaipeng 已提交
217
        self.class_num = 5
218
        self.ignore_thresh = 0.7
219
        self.downsample_ratio = 32
220
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
D
dengkaipeng 已提交
221
        self.gtbox_shape = (3, 5, 4)
222
        self.gtscore = True
223 224 225 226
        self.use_label_smooth = True


class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp):
227 228 229 230 231 232 233 234 235 236 237 238
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = True
239
        self.use_label_smooth = False
240 241


242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
class TestYolov3LossNoGTScore(TestYolov3LossOp):
    def initTestCase(self):
        self.anchors = [
            10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
            373, 326
        ]
        self.anchor_mask = [0, 1, 2]
        self.class_num = 5
        self.ignore_thresh = 0.7
        self.downsample_ratio = 32
        self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.gtscore = False
        self.use_label_smooth = True


258 259
if __name__ == "__main__":
    unittest.main()