test_yolov3_loss_op.py 8.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import division

17 18
import unittest
import numpy as np
19 20
from scipy.special import logit
from scipy.special import expit
21 22
from op_test import OpTest

23 24
from paddle.fluid import core

25

26 27 28 29 30 31 32 33 34 35 36 37 38
def mse(x, y, weight):
    n = x.shape[0]
    x = x.reshape((n, -1))
    y = y.reshape((n, -1))
    weight = weight.reshape((n, -1))
    return ((y - x)**2 * weight).sum(axis=1)


def sce(x, label, weight):
    n = x.shape[0]
    x = x.reshape((n, -1))
    label = label.reshape((n, -1))
    weight = weight.reshape((n, -1))
39 40 41
    sigmoid_x = expit(x)
    term1 = label * np.log(sigmoid_x)
    term2 = (1.0 - label) * np.log(1.0 - sigmoid_x)
42
    return ((-term1 - term2) * weight).sum(axis=1)
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67


def box_iou(box1, box2):
    b1_x1 = box1[0] - box1[2] / 2
    b1_x2 = box1[0] + box1[2] / 2
    b1_y1 = box1[1] - box1[3] / 2
    b1_y2 = box1[1] + box1[3] / 2
    b2_x1 = box2[0] - box2[2] / 2
    b2_x2 = box2[0] + box2[2] / 2
    b2_y1 = box2[1] - box2[3] / 2
    b2_y2 = box2[1] + box2[3] / 2

    b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1)
    b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1)

    inter_rect_x1 = max(b1_x1, b2_x1)
    inter_rect_y1 = max(b1_y1, b2_y1)
    inter_rect_x2 = min(b1_x2, b2_x2)
    inter_rect_y2 = min(b1_y2, b2_y2)
    inter_area = max(inter_rect_x2 - inter_rect_x1, 0) * max(
        inter_rect_y2 - inter_rect_y1, 0)

    return inter_area / (b1_area + b2_area + inter_area)


68 69
def build_target(gtboxes, gtlabel, attrs, grid_size):
    n, b, _ = gtboxes.shape
70 71 72
    ignore_thresh = attrs["ignore_thresh"]
    anchors = attrs["anchors"]
    class_num = attrs["class_num"]
73
    input_size = attrs["input_size"]
74
    an_num = len(anchors) // 2
75 76 77 78 79 80
    obj_mask = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
    noobj_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32')
    tx = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
    ty = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
    tw = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
    th = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
81
    tweight = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
82 83 84 85 86 87
    tconf = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
    tcls = np.zeros(
        (n, an_num, grid_size, grid_size, class_num)).astype('float32')

    for i in range(n):
        for j in range(b):
88
            if gtboxes[i, j, :].sum() == 0:
89 90
                continue

D
dengkaipeng 已提交
91
            gt_label = gtlabel[i, j]
92 93 94 95
            gx = gtboxes[i, j, 0] * grid_size
            gy = gtboxes[i, j, 1] * grid_size
            gw = gtboxes[i, j, 2] * input_size
            gh = gtboxes[i, j, 3] * input_size
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118

            gi = int(gx)
            gj = int(gy)

            gtbox = [0, 0, gw, gh]
            max_iou = 0
            for k in range(an_num):
                anchor_box = [0, 0, anchors[2 * k], anchors[2 * k + 1]]
                iou = box_iou(gtbox, anchor_box)
                if iou > max_iou:
                    max_iou = iou
                    best_an_index = k
                if iou > ignore_thresh:
                    noobj_mask[i, best_an_index, gj, gi] = 0

            obj_mask[i, best_an_index, gj, gi] = 1
            noobj_mask[i, best_an_index, gj, gi] = 0
            tx[i, best_an_index, gj, gi] = gx - gi
            ty[i, best_an_index, gj, gi] = gy - gj
            tw[i, best_an_index, gj, gi] = np.log(gw / anchors[2 *
                                                               best_an_index])
            th[i, best_an_index, gj, gi] = np.log(
                gh / anchors[2 * best_an_index + 1])
119 120
            tweight[i, best_an_index, gj, gi] = 2.0 - gtboxes[
                i, j, 2] * gtboxes[i, j, 3]
121 122 123
            tconf[i, best_an_index, gj, gi] = 1
            tcls[i, best_an_index, gj, gi, gt_label] = 1

124
    return (tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask)
125 126


D
dengkaipeng 已提交
127
def YoloV3Loss(x, gtbox, gtlabel, attrs):
128
    n, c, h, w = x.shape
129
    an_num = len(attrs['anchors']) // 2
130 131
    class_num = attrs["class_num"]
    x = x.reshape((n, an_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
132 133
    pred_x = x[:, :, :, :, 0]
    pred_y = x[:, :, :, :, 1]
134 135
    pred_w = x[:, :, :, :, 2]
    pred_h = x[:, :, :, :, 3]
136 137
    pred_conf = x[:, :, :, :, 4]
    pred_cls = x[:, :, :, :, 5:]
138

139
    tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask = build_target(
D
dengkaipeng 已提交
140
        gtbox, gtlabel, attrs, x.shape[2])
141

142 143
    # print("obj_mask: ", obj_mask[0, 0, :, :])
    # print("noobj_mask: ", noobj_mask[0, 0, :, :])
144
    obj_weight = obj_mask * tweight
145 146
    obj_mask_expand = np.tile(
        np.expand_dims(obj_mask, 4), (1, 1, 1, 1, int(attrs['class_num'])))
147 148 149 150 151 152 153 154 155 156 157 158 159
    loss_x = sce(pred_x, tx, obj_weight)
    loss_y = sce(pred_y, ty, obj_weight)
    loss_w = mse(pred_w, tw, obj_weight)
    loss_h = mse(pred_h, th, obj_weight)
    loss_conf_target = sce(pred_conf, tconf, obj_mask)
    loss_conf_notarget = sce(pred_conf, tconf, noobj_mask)
    loss_class = sce(pred_cls, tcls, obj_mask_expand)

    # print("loss_xy: ", loss_x + loss_y)
    # print("loss_wh: ", loss_w + loss_h)
    # print("loss_conf_target: ", loss_conf_target)
    # print("loss_conf_notarget: ", loss_conf_notarget)
    # print("loss_class: ", loss_class)
160

D
dengkaipeng 已提交
161 162 163 164 165
    return attrs['loss_weight_xy'] * (loss_x + loss_y) \
            + attrs['loss_weight_wh'] * (loss_w + loss_h) \
            + attrs['loss_weight_conf_target'] * loss_conf_target \
            + attrs['loss_weight_conf_notarget'] * loss_conf_notarget \
            + attrs['loss_weight_class'] * loss_class
166 167 168 169


class TestYolov3LossOp(OpTest):
    def setUp(self):
D
dengkaipeng 已提交
170 171 172 173 174
        self.loss_weight_xy = 1.0
        self.loss_weight_wh = 1.0
        self.loss_weight_conf_target = 1.0
        self.loss_weight_conf_notarget = 1.0
        self.loss_weight_class = 1.0
175 176
        self.initTestCase()
        self.op_type = 'yolov3_loss'
177
        x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32'))
178
        gtbox = np.random.random(size=self.gtbox_shape).astype('float32')
D
dengkaipeng 已提交
179 180
        gtlabel = np.random.randint(0, self.class_num,
                                    self.gtbox_shape[:2]).astype('int32')
181 182 183 184 185

        self.attrs = {
            "anchors": self.anchors,
            "class_num": self.class_num,
            "ignore_thresh": self.ignore_thresh,
186
            "input_size": self.input_size,
D
dengkaipeng 已提交
187 188 189 190 191
            "loss_weight_xy": self.loss_weight_xy,
            "loss_weight_wh": self.loss_weight_wh,
            "loss_weight_conf_target": self.loss_weight_conf_target,
            "loss_weight_conf_notarget": self.loss_weight_conf_notarget,
            "loss_weight_class": self.loss_weight_class,
192 193
        }

D
dengkaipeng 已提交
194
        self.inputs = {'X': x, 'GTBox': gtbox, 'GTLabel': gtlabel}
195
        self.outputs = {'Loss': YoloV3Loss(x, gtbox, gtlabel, self.attrs)}
196 197

    def test_check_output(self):
198 199
        place = core.CPUPlace()
        self.check_output_with_place(place, atol=1e-3)
200

D
dengkaipeng 已提交
201 202 203 204 205
    def test_check_grad_ignore_gtbox(self):
        place = core.CPUPlace()
        self.check_grad_with_place(
            place, ['X'],
            'Loss',
206
            no_grad_set=set(["GTBox", "GTLabel"]),
207
            max_relative_error=0.31)
208 209

    def initTestCase(self):
210 211 212
        self.anchors = [12, 12]
        self.class_num = 5
        self.ignore_thresh = 0.3
213
        self.input_size = 416
214 215 216
        self.x_shape = (3, len(self.anchors) // 2 * (5 + self.class_num), 5, 5)
        self.gtbox_shape = (3, 5, 4)
        self.loss_weight_xy = 1.2
D
dengkaipeng 已提交
217
        self.loss_weight_wh = 0.8
218 219 220
        self.loss_weight_conf_target = 2.0
        self.loss_weight_conf_notarget = 1.0
        self.loss_weight_class = 1.5
221 222 223 224


if __name__ == "__main__":
    unittest.main()