diff --git a/paddle/fluid/operators/yolov3_loss_op.h b/paddle/fluid/operators/yolov3_loss_op.h index f601651f0602fc0f00bacb6c0bb05b85e02ab115..5cb48b7cdfb06b6490a0a5ee36d08b9f59764f8e 100644 --- a/paddle/fluid/operators/yolov3_loss_op.h +++ b/paddle/fluid/operators/yolov3_loss_op.h @@ -156,47 +156,29 @@ static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input, template static inline void CalcLabelLoss(T* loss, const T* input, const int index, - const int label, const T score, - const int class_num, const int stride, - const bool use_label_smooth) { - if (use_label_smooth) { - for (int i = 0; i < class_num; i++) { - T pred = input[index + i * stride] < -0.5 ? input[index + i * stride] - : 1.0 / class_num; - loss[0] += SCE(pred, (i == label) ? score : 0.0); - } - } else { - for (int i = 0; i < class_num; i++) { - T pred = input[index + i * stride]; - loss[0] += SCE(pred, (i == label) ? score : 0.0); - } + const int label, const int class_num, + const int stride, const T pos, const T neg) { + for (int i = 0; i < class_num; i++) { + T pred = input[index + i * stride]; + loss[0] += SCE(pred, (i == label) ? pos : neg); } } template static inline void CalcLabelLossGrad(T* input_grad, const T loss, const T* input, const int index, - const int label, const T score, - const int class_num, const int stride, - const bool use_label_smooth) { - if (use_label_smooth) { - for (int i = 0; i < class_num; i++) { - T pred = input[index + i * stride] < -0.5 ? input[index + i * stride] - : 1.0 / class_num; - input_grad[index + i * stride] = - SCEGrad(pred, (i == label) ? score : 0.0) * loss; - } - } else { - for (int i = 0; i < class_num; i++) { - T pred = input[index + i * stride]; - input_grad[index + i * stride] = - SCEGrad(pred, (i == label) ? score : 0.0) * loss; - } + const int label, const int class_num, + const int stride, const T pos, + const T neg) { + for (int i = 0; i < class_num; i++) { + T pred = input[index + i * stride]; + input_grad[index + i * stride] = + SCEGrad(pred, (i == label) ? pos : neg) * loss; } } template -static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness, +static inline void CalcObjnessLoss(T* loss, const T* input, const T* objness, const int n, const int an_num, const int h, const int w, const int stride, const int an_stride) { @@ -204,9 +186,9 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness, for (int j = 0; j < an_num; j++) { for (int k = 0; k < h; k++) { for (int l = 0; l < w; l++) { - int obj = objness[k * w + l]; - if (obj >= 0) { - loss[i] += SCE(input[k * w + l], static_cast(obj)); + T obj = objness[k * w + l]; + if (obj > -0.5) { + loss[i] += SCE(input[k * w + l], obj); } } } @@ -218,7 +200,7 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness, template static inline void CalcObjnessLossGrad(T* input_grad, const T* loss, - const T* input, const int* objness, + const T* input, const T* objness, const int n, const int an_num, const int h, const int w, const int stride, const int an_stride) { @@ -226,10 +208,9 @@ static inline void CalcObjnessLossGrad(T* input_grad, const T* loss, for (int j = 0; j < an_num; j++) { for (int k = 0; k < h; k++) { for (int l = 0; l < w; l++) { - int obj = objness[k * w + l]; - if (obj >= 0) { - input_grad[k * w + l] = - SCEGrad(input[k * w + l], static_cast(obj)) * loss[i]; + T obj = objness[k * w + l]; + if (obj > -0.5) { + input_grad[k * w + l] = SCEGrad(input[k * w + l], obj) * loss[i]; } } } @@ -285,15 +266,22 @@ class Yolov3LossKernel : public framework::OpKernel { const int stride = h * w; const int an_stride = (class_num + 5) * stride; + T label_pos = 1.0; + T label_neg = 0.0; + if (use_label_smooth) { + label_pos = 1.0 - 1.0 / static_cast(class_num); + label_neg = 1.0 / static_cast(class_num); + } + const T* input_data = input->data(); const T* gt_box_data = gt_box->data(); const int* gt_label_data = gt_label->data(); const T* gt_score_data = gt_score->data(); T* loss_data = loss->mutable_data({n}, ctx.GetPlace()); memset(loss_data, 0, loss->numel() * sizeof(T)); - int* obj_mask_data = - objness_mask->mutable_data({n, mask_num, h, w}, ctx.GetPlace()); - memset(obj_mask_data, 0, objness_mask->numel() * sizeof(int)); + T* obj_mask_data = + objness_mask->mutable_data({n, mask_num, h, w}, ctx.GetPlace()); + memset(obj_mask_data, 0, objness_mask->numel() * sizeof(T)); int* gt_match_mask_data = gt_match_mask->mutable_data({n, b}, ctx.GetPlace()); @@ -327,7 +315,7 @@ class Yolov3LossKernel : public framework::OpKernel { if (best_iou > ignore_thresh) { int obj_idx = (i * mask_num + j) * stride + k * w + l; - obj_mask_data[obj_idx] = -1; + obj_mask_data[obj_idx] = static_cast(-1.0); } // TODO(dengkaipeng): all losses should be calculated if best IoU // is bigger then truth thresh should be calculated here, but @@ -374,15 +362,15 @@ class Yolov3LossKernel : public framework::OpKernel { CalcBoxLocationLoss(loss_data + i, input_data, gt, anchors, best_n, box_idx, gi, gj, h, input_size, stride); + T score = gt_score_data[i * b + t]; int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi; - obj_mask_data[obj_idx] = 1; + obj_mask_data[obj_idx] = score; int label = gt_label_data[i * b + t]; - T score = gt_score_data[i * b + t]; int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, an_stride, stride, 5); - CalcLabelLoss(loss_data + i, input_data, label_idx, label, score, - class_num, stride, use_label_smooth); + CalcLabelLoss(loss_data + i, input_data, label_idx, label, + class_num, stride, label_pos, label_neg); } } } @@ -399,7 +387,6 @@ class Yolov3LossGradKernel : public framework::OpKernel { auto* input = ctx.Input("X"); auto* gt_box = ctx.Input("GTBox"); auto* gt_label = ctx.Input("GTLabel"); - auto* gt_score = ctx.Input("GTScore"); auto* input_grad = ctx.Output(framework::GradVarName("X")); auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); auto* objness_mask = ctx.Input("ObjectnessMask"); @@ -421,12 +408,18 @@ class Yolov3LossGradKernel : public framework::OpKernel { const int stride = h * w; const int an_stride = (class_num + 5) * stride; + T label_pos = 1.0; + T label_neg = 0.0; + if (use_label_smooth) { + label_pos = 1.0 - 1.0 / static_cast(class_num); + label_neg = 1.0 / static_cast(class_num); + } + const T* input_data = input->data(); const T* gt_box_data = gt_box->data(); const int* gt_label_data = gt_label->data(); - const T* gt_score_data = gt_score->data(); const T* loss_grad_data = loss_grad->data(); - const int* obj_mask_data = objness_mask->data(); + const T* obj_mask_data = objness_mask->data(); const int* gt_match_mask_data = gt_match_mask->data(); T* input_grad_data = input_grad->mutable_data({n, c, h, w}, ctx.GetPlace()); @@ -447,12 +440,11 @@ class Yolov3LossGradKernel : public framework::OpKernel { anchor_mask[mask_idx], box_idx, gi, gj, h, input_size, stride); int label = gt_label_data[i * b + t]; - T score = gt_score_data[i * b + t]; int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, an_stride, stride, 5); CalcLabelLossGrad(input_grad_data, loss_grad_data[i], input_data, - label_idx, label, score, class_num, stride, - use_label_smooth); + label_idx, label, class_num, stride, label_pos, + label_neg); } } } diff --git a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py index 79c953bbd1fcf5d3ae2f49fcd15a332a71d4dcba..426a64f7a24c3895023c607586bd4ecc9b3260cf 100644 --- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py @@ -81,6 +81,9 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2)) loss = np.zeros((n)).astype('float32') + label_pos = 1.0 - 1.0 / class_num if use_label_smooth else 1.0 + label_neg = 1.0 / class_num if use_label_smooth else 0.0 + pred_box = x[:, :, :, :, :4].copy() grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1)) grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w)) @@ -103,7 +106,7 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): pred_box = pred_box.reshape((n, -1, 4)) pred_obj = x[:, :, :, :, 4].reshape((n, -1)) - objness = np.zeros(pred_box.shape[:2]) + objness = np.zeros(pred_box.shape[:2]).astype('float32') ious = batch_xywh_box_iou(pred_box, gtbox) ious_max = np.max(ious, axis=-1) objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness), @@ -145,17 +148,17 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale - objness[i, an_idx * h * w + gj * w + gi] = 1 + objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j] for label_idx in range(class_num): - loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], - int(label_idx == gtlabel[i, j]) * gtscore[i, j]) + loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos + if label_idx == gtlabel[i, j] else label_neg) for j in range(mask_num * h * w): if objness[i, j] >= 0: loss[i] += sce(pred_obj[i, j], objness[i, j]) - return (loss, objness.reshape((n, mask_num, h, w)).astype('int32'), \ + return (loss, objness.reshape((n, mask_num, h, w)).astype('float32'), \ gt_matches.astype('int32')) @@ -220,9 +223,9 @@ class TestYolov3LossOp(OpTest): self.use_label_smooth = True -class TestYolov3LossWithLabelSmooth(TestYolov3LossOp): +class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp): def set_label_smooth(self): - self.use_label_smooth = True + self.use_label_smooth = False if __name__ == "__main__":