提交 c945ffa7 编写于 作者: D dengkaipeng

fix label_smooth and mixup score

上级 20200e12
...@@ -156,47 +156,29 @@ static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input, ...@@ -156,47 +156,29 @@ static void CalcBoxLocationLossGrad(T* input_grad, const T loss, const T* input,
template <typename T> template <typename T>
static inline void CalcLabelLoss(T* loss, const T* input, const int index, static inline void CalcLabelLoss(T* loss, const T* input, const int index,
const int label, const T score, const int label, const int class_num,
const int class_num, const int stride, const int stride, const T pos, const T neg) {
const bool use_label_smooth) {
if (use_label_smooth) {
for (int i = 0; i < class_num; i++) {
T pred = input[index + i * stride] < -0.5 ? input[index + i * stride]
: 1.0 / class_num;
loss[0] += SCE<T>(pred, (i == label) ? score : 0.0);
}
} else {
for (int i = 0; i < class_num; i++) { for (int i = 0; i < class_num; i++) {
T pred = input[index + i * stride]; T pred = input[index + i * stride];
loss[0] += SCE<T>(pred, (i == label) ? score : 0.0); loss[0] += SCE<T>(pred, (i == label) ? pos : neg);
}
} }
} }
template <typename T> template <typename T>
static inline void CalcLabelLossGrad(T* input_grad, const T loss, static inline void CalcLabelLossGrad(T* input_grad, const T loss,
const T* input, const int index, const T* input, const int index,
const int label, const T score, const int label, const int class_num,
const int class_num, const int stride, const int stride, const T pos,
const bool use_label_smooth) { const T neg) {
if (use_label_smooth) {
for (int i = 0; i < class_num; i++) {
T pred = input[index + i * stride] < -0.5 ? input[index + i * stride]
: 1.0 / class_num;
input_grad[index + i * stride] =
SCEGrad<T>(pred, (i == label) ? score : 0.0) * loss;
}
} else {
for (int i = 0; i < class_num; i++) { for (int i = 0; i < class_num; i++) {
T pred = input[index + i * stride]; T pred = input[index + i * stride];
input_grad[index + i * stride] = input_grad[index + i * stride] =
SCEGrad<T>(pred, (i == label) ? score : 0.0) * loss; SCEGrad<T>(pred, (i == label) ? pos : neg) * loss;
}
} }
} }
template <typename T> template <typename T>
static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness, static inline void CalcObjnessLoss(T* loss, const T* input, const T* objness,
const int n, const int an_num, const int h, const int n, const int an_num, const int h,
const int w, const int stride, const int w, const int stride,
const int an_stride) { const int an_stride) {
...@@ -204,9 +186,9 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness, ...@@ -204,9 +186,9 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness,
for (int j = 0; j < an_num; j++) { for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) { for (int k = 0; k < h; k++) {
for (int l = 0; l < w; l++) { for (int l = 0; l < w; l++) {
int obj = objness[k * w + l]; T obj = objness[k * w + l];
if (obj >= 0) { if (obj > -0.5) {
loss[i] += SCE<T>(input[k * w + l], static_cast<T>(obj)); loss[i] += SCE<T>(input[k * w + l], obj);
} }
} }
} }
...@@ -218,7 +200,7 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness, ...@@ -218,7 +200,7 @@ static inline void CalcObjnessLoss(T* loss, const T* input, const int* objness,
template <typename T> template <typename T>
static inline void CalcObjnessLossGrad(T* input_grad, const T* loss, static inline void CalcObjnessLossGrad(T* input_grad, const T* loss,
const T* input, const int* objness, const T* input, const T* objness,
const int n, const int an_num, const int n, const int an_num,
const int h, const int w, const int h, const int w,
const int stride, const int an_stride) { const int stride, const int an_stride) {
...@@ -226,10 +208,9 @@ static inline void CalcObjnessLossGrad(T* input_grad, const T* loss, ...@@ -226,10 +208,9 @@ static inline void CalcObjnessLossGrad(T* input_grad, const T* loss,
for (int j = 0; j < an_num; j++) { for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) { for (int k = 0; k < h; k++) {
for (int l = 0; l < w; l++) { for (int l = 0; l < w; l++) {
int obj = objness[k * w + l]; T obj = objness[k * w + l];
if (obj >= 0) { if (obj > -0.5) {
input_grad[k * w + l] = input_grad[k * w + l] = SCEGrad<T>(input[k * w + l], obj) * loss[i];
SCEGrad<T>(input[k * w + l], static_cast<T>(obj)) * loss[i];
} }
} }
} }
...@@ -285,15 +266,22 @@ class Yolov3LossKernel : public framework::OpKernel<T> { ...@@ -285,15 +266,22 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
const int stride = h * w; const int stride = h * w;
const int an_stride = (class_num + 5) * stride; const int an_stride = (class_num + 5) * stride;
T label_pos = 1.0;
T label_neg = 0.0;
if (use_label_smooth) {
label_pos = 1.0 - 1.0 / static_cast<T>(class_num);
label_neg = 1.0 / static_cast<T>(class_num);
}
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* gt_box_data = gt_box->data<T>(); const T* gt_box_data = gt_box->data<T>();
const int* gt_label_data = gt_label->data<int>(); const int* gt_label_data = gt_label->data<int>();
const T* gt_score_data = gt_score->data<T>(); const T* gt_score_data = gt_score->data<T>();
T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace()); T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace());
memset(loss_data, 0, loss->numel() * sizeof(T)); memset(loss_data, 0, loss->numel() * sizeof(T));
int* obj_mask_data = T* obj_mask_data =
objness_mask->mutable_data<int>({n, mask_num, h, w}, ctx.GetPlace()); objness_mask->mutable_data<T>({n, mask_num, h, w}, ctx.GetPlace());
memset(obj_mask_data, 0, objness_mask->numel() * sizeof(int)); memset(obj_mask_data, 0, objness_mask->numel() * sizeof(T));
int* gt_match_mask_data = int* gt_match_mask_data =
gt_match_mask->mutable_data<int>({n, b}, ctx.GetPlace()); gt_match_mask->mutable_data<int>({n, b}, ctx.GetPlace());
...@@ -327,7 +315,7 @@ class Yolov3LossKernel : public framework::OpKernel<T> { ...@@ -327,7 +315,7 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
if (best_iou > ignore_thresh) { if (best_iou > ignore_thresh) {
int obj_idx = (i * mask_num + j) * stride + k * w + l; int obj_idx = (i * mask_num + j) * stride + k * w + l;
obj_mask_data[obj_idx] = -1; obj_mask_data[obj_idx] = static_cast<T>(-1.0);
} }
// TODO(dengkaipeng): all losses should be calculated if best IoU // TODO(dengkaipeng): all losses should be calculated if best IoU
// is bigger then truth thresh should be calculated here, but // is bigger then truth thresh should be calculated here, but
...@@ -374,15 +362,15 @@ class Yolov3LossKernel : public framework::OpKernel<T> { ...@@ -374,15 +362,15 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
CalcBoxLocationLoss<T>(loss_data + i, input_data, gt, anchors, best_n, CalcBoxLocationLoss<T>(loss_data + i, input_data, gt, anchors, best_n,
box_idx, gi, gj, h, input_size, stride); box_idx, gi, gj, h, input_size, stride);
T score = gt_score_data[i * b + t];
int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi; int obj_idx = (i * mask_num + mask_idx) * stride + gj * w + gi;
obj_mask_data[obj_idx] = 1; obj_mask_data[obj_idx] = score;
int label = gt_label_data[i * b + t]; int label = gt_label_data[i * b + t];
T score = gt_score_data[i * b + t];
int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
an_stride, stride, 5); an_stride, stride, 5);
CalcLabelLoss<T>(loss_data + i, input_data, label_idx, label, score, CalcLabelLoss<T>(loss_data + i, input_data, label_idx, label,
class_num, stride, use_label_smooth); class_num, stride, label_pos, label_neg);
} }
} }
} }
...@@ -399,7 +387,6 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> { ...@@ -399,7 +387,6 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<Tensor>("X");
auto* gt_box = ctx.Input<Tensor>("GTBox"); auto* gt_box = ctx.Input<Tensor>("GTBox");
auto* gt_label = ctx.Input<Tensor>("GTLabel"); auto* gt_label = ctx.Input<Tensor>("GTLabel");
auto* gt_score = ctx.Input<Tensor>("GTScore");
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X")); auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss")); auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
auto* objness_mask = ctx.Input<Tensor>("ObjectnessMask"); auto* objness_mask = ctx.Input<Tensor>("ObjectnessMask");
...@@ -421,12 +408,18 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> { ...@@ -421,12 +408,18 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
const int stride = h * w; const int stride = h * w;
const int an_stride = (class_num + 5) * stride; const int an_stride = (class_num + 5) * stride;
T label_pos = 1.0;
T label_neg = 0.0;
if (use_label_smooth) {
label_pos = 1.0 - 1.0 / static_cast<T>(class_num);
label_neg = 1.0 / static_cast<T>(class_num);
}
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
const T* gt_box_data = gt_box->data<T>(); const T* gt_box_data = gt_box->data<T>();
const int* gt_label_data = gt_label->data<int>(); const int* gt_label_data = gt_label->data<int>();
const T* gt_score_data = gt_score->data<T>();
const T* loss_grad_data = loss_grad->data<T>(); const T* loss_grad_data = loss_grad->data<T>();
const int* obj_mask_data = objness_mask->data<int>(); const T* obj_mask_data = objness_mask->data<T>();
const int* gt_match_mask_data = gt_match_mask->data<int>(); const int* gt_match_mask_data = gt_match_mask->data<int>();
T* input_grad_data = T* input_grad_data =
input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace()); input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
...@@ -447,12 +440,11 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> { ...@@ -447,12 +440,11 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
anchor_mask[mask_idx], box_idx, gi, gj, h, input_size, stride); anchor_mask[mask_idx], box_idx, gi, gj, h, input_size, stride);
int label = gt_label_data[i * b + t]; int label = gt_label_data[i * b + t];
T score = gt_score_data[i * b + t];
int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num, int label_idx = GetEntryIndex(i, mask_idx, gj * w + gi, mask_num,
an_stride, stride, 5); an_stride, stride, 5);
CalcLabelLossGrad<T>(input_grad_data, loss_grad_data[i], input_data, CalcLabelLossGrad<T>(input_grad_data, loss_grad_data[i], input_data,
label_idx, label, score, class_num, stride, label_idx, label, class_num, stride, label_pos,
use_label_smooth); label_neg);
} }
} }
} }
......
...@@ -81,6 +81,9 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): ...@@ -81,6 +81,9 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2)) x = x.reshape((n, mask_num, 5 + class_num, h, w)).transpose((0, 1, 3, 4, 2))
loss = np.zeros((n)).astype('float32') loss = np.zeros((n)).astype('float32')
label_pos = 1.0 - 1.0 / class_num if use_label_smooth else 1.0
label_neg = 1.0 / class_num if use_label_smooth else 0.0
pred_box = x[:, :, :, :, :4].copy() pred_box = x[:, :, :, :, :4].copy()
grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1)) grid_x = np.tile(np.arange(w).reshape((1, w)), (h, 1))
grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w)) grid_y = np.tile(np.arange(h).reshape((h, 1)), (1, w))
...@@ -103,7 +106,7 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): ...@@ -103,7 +106,7 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
pred_box = pred_box.reshape((n, -1, 4)) pred_box = pred_box.reshape((n, -1, 4))
pred_obj = x[:, :, :, :, 4].reshape((n, -1)) pred_obj = x[:, :, :, :, 4].reshape((n, -1))
objness = np.zeros(pred_box.shape[:2]) objness = np.zeros(pred_box.shape[:2]).astype('float32')
ious = batch_xywh_box_iou(pred_box, gtbox) ious = batch_xywh_box_iou(pred_box, gtbox)
ious_max = np.max(ious, axis=-1) ious_max = np.max(ious, axis=-1)
objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness), objness = np.where(ious_max > ignore_thresh, -np.ones_like(objness),
...@@ -145,17 +148,17 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs): ...@@ -145,17 +148,17 @@ def YOLOv3Loss(x, gtbox, gtlabel, gtscore, attrs):
loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale loss[i] += l1loss(x[i, an_idx, gj, gi, 2], tw) * scale
loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale loss[i] += l1loss(x[i, an_idx, gj, gi, 3], th) * scale
objness[i, an_idx * h * w + gj * w + gi] = 1 objness[i, an_idx * h * w + gj * w + gi] = gtscore[i, j]
for label_idx in range(class_num): for label_idx in range(class_num):
loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], loss[i] += sce(x[i, an_idx, gj, gi, 5 + label_idx], label_pos
int(label_idx == gtlabel[i, j]) * gtscore[i, j]) if label_idx == gtlabel[i, j] else label_neg)
for j in range(mask_num * h * w): for j in range(mask_num * h * w):
if objness[i, j] >= 0: if objness[i, j] >= 0:
loss[i] += sce(pred_obj[i, j], objness[i, j]) loss[i] += sce(pred_obj[i, j], objness[i, j])
return (loss, objness.reshape((n, mask_num, h, w)).astype('int32'), \ return (loss, objness.reshape((n, mask_num, h, w)).astype('float32'), \
gt_matches.astype('int32')) gt_matches.astype('int32'))
...@@ -220,9 +223,9 @@ class TestYolov3LossOp(OpTest): ...@@ -220,9 +223,9 @@ class TestYolov3LossOp(OpTest):
self.use_label_smooth = True self.use_label_smooth = True
class TestYolov3LossWithLabelSmooth(TestYolov3LossOp): class TestYolov3LossWithoutLabelSmooth(TestYolov3LossOp):
def set_label_smooth(self): def set_label_smooth(self):
self.use_label_smooth = True self.use_label_smooth = False
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册