diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc index 38eb43a3cc1529294fd3c3fab8e41c7fac2245e1..3b1d4d2a8052ca86d76ef12b140d83f66a837623 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.cc +++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc @@ -223,6 +223,15 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { loss = (loss_{xy} + loss_{wh}) * weight_{box} + loss_{conf} + loss_{class} $$ + + While :attr:`use_label_smooth` is set to be :attr:`True`, the classification + target will be smoothed when calculating classification loss, target of + positive samples will be smoothed to $$1.0 - 1.0/class_num$$ and target of + negetive samples will be smoothed to $$1.0/class_num$$. + + While :attr:`GTScore` is given, which means the mixup score of ground truth + boxes, all looses incured by a ground truth box will be multiplied by its + mixup score. )DOC"); } }; diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index cbedd70f857b3f767492826cda08ae1171d72bad..aa7b4a50be24fcecbfefeba710eb9cbec5ecd4ec 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -515,7 +515,9 @@ def yolov3_loss(x, class_num, ignore_thresh, downsample_ratio, - name=None): + name=None, + gtscore=None, + use_label_smooth=True): """ ${comment} @@ -534,27 +536,34 @@ def yolov3_loss(x, ignore_thresh (float): ${ignore_thresh_comment} downsample_ratio (int): ${downsample_ratio_comment} name (string): the name of yolov3 loss + gtscore (Variable): mixup score of ground truth boxes, shoud be in shape + of [N, B]. + use_label_smooth (bool): ${use_label_smooth_comment} Returns: - Variable: A 1-D tensor with shape [1], the value of yolov3 loss + Variable: A 1-D tensor with shape [N], the value of yolov3 loss Raises: TypeError: Input x of yolov3_loss must be Variable TypeError: Input gtbox of yolov3_loss must be Variable" TypeError: Input gtlabel of yolov3_loss must be Variable" + TypeError: Input gtscore of yolov3_loss must be Variable" TypeError: Attr anchors of yolov3_loss must be list or tuple TypeError: Attr class_num of yolov3_loss must be an integer TypeError: Attr ignore_thresh of yolov3_loss must be a float number + TypeError: Attr use_label_smooth of yolov3_loss must be a bool value Examples: .. code-block:: python x = fluid.layers.data(name='x', shape=[255, 13, 13], dtype='float32') - gtbox = fluid.layers.data(name='gtbox', shape=[6, 5], dtype='float32') - gtlabel = fluid.layers.data(name='gtlabel', shape=[6, 1], dtype='int32') + gtbox = fluid.layers.data(name='gtbox', shape=[6, 4], dtype='float32') + gtlabel = fluid.layers.data(name='gtlabel', shape=[6], dtype='int32') + gtscore = fluid.layers.data(name='gtlabel', shape=[6], dtype='int32') anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326] anchor_mask = [0, 1, 2] - loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, anchors=anchors, + loss = fluid.layers.yolov3_loss(x=x, gtbox=gtbox, gtlabel=gtlabel, + gtscore=gtscore, anchors=anchors, anchor_mask=anchor_mask, class_num=80, ignore_thresh=0.7, downsample_ratio=32) """ @@ -566,6 +575,8 @@ def yolov3_loss(x, raise TypeError("Input gtbox of yolov3_loss must be Variable") if not isinstance(gtlabel, Variable): raise TypeError("Input gtlabel of yolov3_loss must be Variable") + if not isinstance(gtscore, Variable): + raise TypeError("Input gtscore of yolov3_loss must be Variable") if not isinstance(anchors, list) and not isinstance(anchors, tuple): raise TypeError("Attr anchors of yolov3_loss must be list or tuple") if not isinstance(anchor_mask, list) and not isinstance(anchor_mask, tuple): @@ -575,6 +586,9 @@ def yolov3_loss(x, if not isinstance(ignore_thresh, float): raise TypeError( "Attr ignore_thresh of yolov3_loss must be a float number") + if not isinstance(use_label_smooth, bool): + raise TypeError( + "Attr use_label_smooth of yolov3_loss must be a bool value") if name is None: loss = helper.create_variable_for_type_inference(dtype=x.dtype) @@ -585,21 +599,26 @@ def yolov3_loss(x, objectness_mask = helper.create_variable_for_type_inference(dtype='int32') gt_match_mask = helper.create_variable_for_type_inference(dtype='int32') + inputs = { + "X": x, + "GTBox": gtbox, + "GTLabel": gtlabel, + } + if gtscore: + inputs["GTScore"] = gtscore + attrs = { "anchors": anchors, "anchor_mask": anchor_mask, "class_num": class_num, "ignore_thresh": ignore_thresh, "downsample_ratio": downsample_ratio, + "use_label_smooth": use_label_smooth, } helper.append_op( type='yolov3_loss', - inputs={ - "X": x, - "GTBox": gtbox, - "GTLabel": gtlabel, - }, + inputs=inputs, outputs={ 'Loss': loss, 'ObjectnessMask': objectness_mask, diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 6218db73459a2bb55d72545c738f88dbd8cce0f7..b202c0ffea2b33cd5708ea0b4bdb8ec7b97e531a 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -476,8 +476,16 @@ class TestYoloDetection(unittest.TestCase): x = layers.data(name='x', shape=[30, 7, 7], dtype='float32') gtbox = layers.data(name='gtbox', shape=[10, 4], dtype='float32') gtlabel = layers.data(name='gtlabel', shape=[10], dtype='int32') - loss = layers.yolov3_loss(x, gtbox, gtlabel, [10, 13, 30, 13], - [0, 1], 10, 0.7, 32) + gtscore = layers.data(name='gtscore', shape=[10], dtype='int32') + loss = layers.yolov3_loss( + x, + gtbox, + gtlabel, [10, 13, 30, 13], [0, 1], + 10, + 0.7, + 32, + gtscore=gtscore, + use_label_smooth=False) self.assertIsNotNone(loss) diff --git a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py index 020c1139230a9177c4d7765367359d91839d7d46..b3d1ff8058e6a59378e9e55a91bc893b87e18f0e 100644 --- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py @@ -23,8 +23,8 @@ from op_test import OpTest from paddle.fluid import core -def l2loss(x, y): - return 0.5 * (y - x) * (y - x) +def l1loss(x, y): + return abs(x - y) def sce(x, label): @@ -66,7 +66,7 @@ def batch_xywh_box_iou(box1, box2): return inter_area / union -def YOLOv3Loss(x, gtbox, gtlabel, attrs): +def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): n, c, h, w = x.shape b = gtbox.shape[1] anchors = attrs['anchors'] @@ -75,21 +75,21 @@ def YOLOv3Loss(x, gtbox, gtlabel, attrs): mask_num = len(anchor_mask) class_num = attrs["class_num"] ignore_thresh = attrs['ignore_thresh'] - downsample = attrs['downsample'] - input_size = downsample * h + downsample_ratio = attrs['downsample_ratio'] + use_label_smooth = attrs['use_label_smooth'] + input_size = downsample_ratio * h x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2)) loss = np.zeros((n)).astype('float32') + label_pos = 1.0 - 1.0 / class_num if use_label_smooth else 1.0 + label_neg = 1.0 / class_num if use_label_smooth else 0.0 + pred_box = x[:, :, :, :, :4].copy() grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1)) grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w)) pred_box[:, :, :, :, 0] = (grid_x + sigmoid(pred_box[:, :, :, :, 0])) / w pred_box[:, :, :, :, 1] = (grid_y + sigmoid(pred_box[:, :, :, :, 1])) / h - x[:, :, :, :, 5:] = np.where(x[:, :, :, :, 5:] < -0.5, x[:, :, :, :, 5:], - np.ones_like(x[:, :, :, :, 5:]) * 1.0 / - class_num) - mask_anchors = [] for m in anchor_mask: mask_anchors.append((anchors[2 * m], anchors[2 * m + 1])) @@ -138,21 +138,22 @@ def YOLOv3Loss(x, gtbox, gtlabel, attrs): ty = gtbox[i, j, 1] * w - gj tw = np.log(gtbox[i, j, 2] * input_size / mask_anchors[an_idx][0]) th = np.log(gtbox[i, j, 3] * input_size / mask_anchors[an_idx][1]) - scale = (2.0 - gtbox[i, j, 2] * gtbox[i, j, 3]) + scale = (2.0 - gtbox[i, j, 2] * gtbox[i, j, 3]) * gtscore[i, j] loss[i] += sce(x[i, an_idx, gj, gi, 0], tx) * scale loss[i] += sce(x[i, an_idx, gj, gi, 1], ty) * scale - loss[i] += l2loss(x[i, an_idx, gj, gi, 2], tw) * scale - loss[i] += l2loss(x[i, an_idx, gj, gi, 3], th) * scale + loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale + loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale - objness[i, an_idx * h * w + gj * w + gi] = 1.0 + objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j] for label_idx in range(class_num): - loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], - float(label_idx == gtlabel[i, j])) + loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos + if label_idx == gtlabel[i, j] else + label_neg) * gtscore[i, j] for j in range(mask_num * h * w): if objness[i, j] > 0: - loss[i] += sce(pred_obj[i, j], 1.0) + loss[i] += sce(pred_obj[i, j], 1.0) * objness[i, j] elif objness[i, j] == 0: loss[i] += sce(pred_obj[i, j], 0.0) @@ -167,6 +168,7 @@ class TestYolov3LossOp(OpTest): x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32')) gtbox = np.random.random(size=self.gtbox_shape).astype('float32') gtlabel = np.random.randint(0, self.class_num, self.gtbox_shape[:2]) + gtscore = np.random.random(self.gtbox_shape[:2]).astype('float32') gtmask = np.random.randint(0, 2, self.gtbox_shape[:2]) gtbox = gtbox * gtmask[:, :, np.newaxis] gtlabel = gtlabel * gtmask @@ -176,15 +178,18 @@ class TestYolov3LossOp(OpTest): "anchor_mask": self.anchor_mask, "class_num": self.class_num, "ignore_thresh": self.ignore_thresh, - "downsample": self.downsample, + "downsample_ratio": self.downsample_ratio, + "use_label_smooth": self.use_label_smooth, } self.inputs = { 'X': x, 'GTBox': gtbox.astype('float32'), 'GTLabel': gtlabel.astype('int32'), + 'GTScore': gtscore.astype('float32') } - loss, objness, gt_matches = YOLOv3Loss(x, gtbox, gtlabel, self.attrs) + loss, objness, gt_matches = YOLOv3Loss(x, gtbox, gtlabel, gtscore, + self.attrs) self.outputs = { 'Loss': loss, 'ObjectnessMask': objness, @@ -193,24 +198,33 @@ class TestYolov3LossOp(OpTest): def test_check_output(self): place = core.CPUPlace() - self.check_output_with_place(place, atol=1e-3) + self.check_output_with_place(place, atol=2e-3) def test_check_grad_ignore_gtbox(self): place = core.CPUPlace() self.check_grad_with_place( place, ['X'], 'Loss', - no_grad_set=set(["GTBox", "GTLabel"]), - max_relative_error=0.3) + no_grad_set=set(["GTBox", "GTLabel", "GTScore"]), + max_relative_error=0.2) def initTestCase(self): - self.anchors = [10, 13, 16, 30, 33, 23] - self.anchor_mask = [1, 2] + self.anchors = [ + 10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, + 373, 326 + ] + self.anchor_mask = [0, 1, 2] self.class_num = 5 self.ignore_thresh = 0.5 - self.downsample = 32 + self.downsample_ratio = 32 self.x_shape = (3, len(self.anchor_mask) * (5 + self.class_num), 5, 5) self.gtbox_shape = (3, 5, 4) + self.use_label_smooth = True + + +class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp): + def set_label_smooth(self): + self.use_label_smooth = False if __name__ == "__main__":