diff --git a/paddle/fluid/operators/yolov3_loss_op.cc b/paddle/fluid/operators/yolov3_loss_op.cc index c76767dfdd464769ff8962a0512fd6a7705bef6c..3bd0db8b592bdceba3dab670394434ae6872eda1 100644 --- a/paddle/fluid/operators/yolov3_loss_op.cc +++ b/paddle/fluid/operators/yolov3_loss_op.cc @@ -34,11 +34,12 @@ class Yolov3LossOp : public framework::OperatorWithKernel { auto dim_gtbox = ctx->GetInputDim("GTBox"); auto dim_gtlabel = ctx->GetInputDim("GTLabel"); auto anchors = ctx->Attrs().Get>("anchors"); + int anchor_num = anchors.size() / 2; auto class_num = ctx->Attrs().Get("class_num"); PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor."); PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3], "Input(X) dim[3] and dim[4] should be euqal."); - PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num), + PADDLE_ENFORCE_EQ(dim_x[1], anchor_num * (5 + class_num), "Input(X) dim[1] should be equal to (anchor_number * (5 " "+ class_num))."); PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3, @@ -105,20 +106,6 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { .SetDefault(406); AddAttr("ignore_thresh", "The ignore threshold to ignore confidence loss."); - AddAttr("loss_weight_xy", "The weight of x, y location loss.") - .SetDefault(1.0); - AddAttr("loss_weight_wh", "The weight of w, h location loss.") - .SetDefault(1.0); - AddAttr( - "loss_weight_conf_target", - "The weight of confidence score loss in locations with target object.") - .SetDefault(1.0); - AddAttr("loss_weight_conf_notarget", - "The weight of confidence score loss in locations without " - "target object.") - .SetDefault(1.0); - AddAttr("loss_weight_class", "The weight of classification loss.") - .SetDefault(1.0); AddComment(R"DOC( This operator generate yolov3 loss by given predict result and ground truth boxes. diff --git a/paddle/fluid/operators/yolov3_loss_op.h b/paddle/fluid/operators/yolov3_loss_op.h index d0064a81902b81e6ab2605bb7dd1af5f164ec73d..5de5b4efc797c6c5e95a8f5da7b440f30ca04909 100644 --- a/paddle/fluid/operators/yolov3_loss_op.h +++ b/paddle/fluid/operators/yolov3_loss_op.h @@ -164,48 +164,50 @@ static inline void CalcSCEGradWithWeight(const T* loss_grad, Tensor* grad, } } -template -static void SplitPredResult(const Tensor& input, Tensor* pred_conf, - Tensor* pred_class, Tensor* pred_x, Tensor* pred_y, - Tensor* pred_w, Tensor* pred_h, - const int anchor_num, const int class_num) { - const int n = input.dims()[0]; - const int h = input.dims()[2]; - const int w = input.dims()[3]; - const int box_attr_num = 5 + class_num; - - auto input_t = EigenTensor::From(input); - auto pred_conf_t = EigenTensor::From(*pred_conf); - auto pred_class_t = EigenTensor::From(*pred_class); - auto pred_x_t = EigenTensor::From(*pred_x); - auto pred_y_t = EigenTensor::From(*pred_y); - auto pred_w_t = EigenTensor::From(*pred_w); - auto pred_h_t = EigenTensor::From(*pred_h); - - for (int i = 0; i < n; i++) { - for (int an_idx = 0; an_idx < anchor_num; an_idx++) { - for (int j = 0; j < h; j++) { - for (int k = 0; k < w; k++) { - pred_x_t(i, an_idx, j, k) = input_t(i, box_attr_num * an_idx, j, k); - pred_y_t(i, an_idx, j, k) = - input_t(i, box_attr_num * an_idx + 1, j, k); - pred_w_t(i, an_idx, j, k) = - input_t(i, box_attr_num * an_idx + 2, j, k); - pred_h_t(i, an_idx, j, k) = - input_t(i, box_attr_num * an_idx + 3, j, k); - - pred_conf_t(i, an_idx, j, k) = - input_t(i, box_attr_num * an_idx + 4, j, k); - - for (int c = 0; c < class_num; c++) { - pred_class_t(i, an_idx, j, k, c) = - input_t(i, box_attr_num * an_idx + 5 + c, j, k); - } - } - } - } - } -} +// template +// static void SplitPredResult(const Tensor& input, Tensor* pred_conf, +// Tensor* pred_class, Tensor* pred_x, Tensor* +// pred_y, +// Tensor* pred_w, Tensor* pred_h, +// const int anchor_num, const int class_num) { +// const int n = input.dims()[0]; +// const int h = input.dims()[2]; +// const int w = input.dims()[3]; +// const int box_attr_num = 5 + class_num; +// +// auto input_t = EigenTensor::From(input); +// auto pred_conf_t = EigenTensor::From(*pred_conf); +// auto pred_class_t = EigenTensor::From(*pred_class); +// auto pred_x_t = EigenTensor::From(*pred_x); +// auto pred_y_t = EigenTensor::From(*pred_y); +// auto pred_w_t = EigenTensor::From(*pred_w); +// auto pred_h_t = EigenTensor::From(*pred_h); +// +// for (int i = 0; i < n; i++) { +// for (int an_idx = 0; an_idx < anchor_num; an_idx++) { +// for (int j = 0; j < h; j++) { +// for (int k = 0; k < w; k++) { +// pred_x_t(i, an_idx, j, k) = input_t(i, box_attr_num * an_idx, j, +// k); +// pred_y_t(i, an_idx, j, k) = +// input_t(i, box_attr_num * an_idx + 1, j, k); +// pred_w_t(i, an_idx, j, k) = +// input_t(i, box_attr_num * an_idx + 2, j, k); +// pred_h_t(i, an_idx, j, k) = +// input_t(i, box_attr_num * an_idx + 3, j, k); +// +// pred_conf_t(i, an_idx, j, k) = +// input_t(i, box_attr_num * an_idx + 4, j, k); +// +// for (int c = 0; c < class_num; c++) { +// pred_class_t(i, an_idx, j, k, c) = +// input_t(i, box_attr_num * an_idx + 5 + c, j, k); +// } +// } +// } +// } +// } +// } template static T CalcBoxIoU(std::vector box1, std::vector box2) { @@ -235,7 +237,7 @@ template static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, const float ignore_thresh, std::vector anchors, const int input_size, const int grid_size, - Tensor* obj_mask, Tensor* noobj_mask, Tensor* tx, + Tensor* conf_mask, Tensor* obj_mask, Tensor* tx, Tensor* ty, Tensor* tw, Tensor* th, Tensor* tweight, Tensor* tconf, Tensor* tclass) { const int n = gt_box.dims()[0]; @@ -243,8 +245,8 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, const int anchor_num = anchors.size() / 2; auto gt_box_t = EigenTensor::From(gt_box); auto gt_label_t = EigenTensor::From(gt_label); - auto obj_mask_t = EigenTensor::From(*obj_mask).setConstant(0); - auto noobj_mask_t = EigenTensor::From(*noobj_mask).setConstant(1); + auto conf_mask_t = EigenTensor::From(*conf_mask).setConstant(1.0); + auto obj_mask_t = EigenTensor::From(*obj_mask).setConstant(0.0); auto tx_t = EigenTensor::From(*tx).setConstant(0.0); auto ty_t = EigenTensor::From(*ty).setConstant(0.0); auto tw_t = EigenTensor::From(*tw).setConstant(0.0); @@ -280,11 +282,11 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, best_an_index = an_idx; } if (iou > ignore_thresh) { - noobj_mask_t(i, an_idx, gj, gi) = static_cast(0.0); + conf_mask_t(i, an_idx, gj, gi) = static_cast(0.0); } } + conf_mask_t(i, best_an_index, gj, gi) = static_cast(1.0); obj_mask_t(i, best_an_index, gj, gi) = static_cast(1.0); - noobj_mask_t(i, best_an_index, gj, gi) = static_cast(0.0); tx_t(i, best_an_index, gj, gi) = gx - gi; ty_t(i, best_an_index, gj, gi) = gy - gj; tw_t(i, best_an_index, gj, gi) = log(gw / anchors[2 * best_an_index]); @@ -298,53 +300,194 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, } template -static void AddAllGradToInputGrad( - Tensor* grad, const Tensor& grad_x, const Tensor& grad_y, - const Tensor& grad_w, const Tensor& grad_h, const Tensor& grad_conf_target, - const Tensor& grad_conf_notarget, const Tensor& grad_class, - const int class_num, const float loss_weight_xy, const float loss_weight_wh, - const float loss_weight_conf_target, const float loss_weight_conf_notarget, - const float loss_weight_class) { - const int n = grad_x.dims()[0]; - const int an_num = grad_x.dims()[1]; - const int h = grad_x.dims()[2]; - const int w = grad_x.dims()[3]; - const int attr_num = class_num + 5; - auto grad_t = EigenTensor::From(*grad).setConstant(0.0); - auto grad_x_t = EigenTensor::From(grad_x); - auto grad_y_t = EigenTensor::From(grad_y); - auto grad_w_t = EigenTensor::From(grad_w); - auto grad_h_t = EigenTensor::From(grad_h); - auto grad_conf_target_t = EigenTensor::From(grad_conf_target); - auto grad_conf_notarget_t = EigenTensor::From(grad_conf_notarget); - auto grad_class_t = EigenTensor::From(grad_class); +static T SCE(T x, T label) { + return (x > 0 ? x : 0.0) - x * label + std::log(1.0 + std::exp(-std::abs(x))); +} + +template +static T L1Loss(T x, T y) { + return std::abs(y - x); +} + +template +static T SCEGrad(T x, T label) { + return 1.0 / (1.0 + std::exp(-x)) - label; +} + +template +static T L1LossGrad(T x, T y) { + return x > y ? 1.0 : -1.0; +} + +template +static void CalcSCE(T* loss_data, const T* input, const T* target, + const T* weight, const T* mask, const int n, + const int an_num, const int grid_num, const int class_num, + const int num) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < an_num; j++) { + for (int k = 0; k < grid_num; k++) { + int sub_idx = k * num; + for (int l = 0; l < num; l++) { + loss_data[i] += SCE(input[l * grid_num + k], target[sub_idx + l]) * + weight[k] * mask[k]; + } + } + input += (class_num + 5) * grid_num; + target += grid_num * num; + weight += grid_num; + mask += grid_num; + } + } +} +template +static void CalcSCEGrad(T* input_grad, const T* loss_grad, const T* input, + const T* target, const T* weight, const T* mask, + const int n, const int an_num, const int grid_num, + const int class_num, const int num) { for (int i = 0; i < n; i++) { for (int j = 0; j < an_num; j++) { - for (int k = 0; k < h; k++) { - for (int l = 0; l < w; l++) { - grad_t(i, j * attr_num, k, l) = grad_x_t(i, j, k, l) * loss_weight_xy; - grad_t(i, j * attr_num + 1, k, l) = - grad_y_t(i, j, k, l) * loss_weight_xy; - grad_t(i, j * attr_num + 2, k, l) = - grad_w_t(i, j, k, l) * loss_weight_wh; - grad_t(i, j * attr_num + 3, k, l) = - grad_h_t(i, j, k, l) * loss_weight_wh; - grad_t(i, j * attr_num + 4, k, l) = - grad_conf_target_t(i, j, k, l) * loss_weight_conf_target; - grad_t(i, j * attr_num + 4, k, l) += - grad_conf_notarget_t(i, j, k, l) * loss_weight_conf_notarget; - - for (int c = 0; c < class_num; c++) { - grad_t(i, j * attr_num + 5 + c, k, l) = - grad_class_t(i, j, k, l, c) * loss_weight_class; - } + for (int k = 0; k < grid_num; k++) { + int sub_idx = k * num; + for (int l = 0; l < num; l++) { + input_grad[l * grid_num + k] = + SCEGrad(input[l * grid_num + k], target[sub_idx + l]) * + weight[k] * mask[k] * loss_grad[i]; } } + input_grad += (class_num + 5) * grid_num; + input += (class_num + 5) * grid_num; + target += grid_num * num; + weight += grid_num; + mask += grid_num; + } + } +} + +template +static void CalcL1Loss(T* loss_data, const T* input, const T* target, + const T* weight, const T* mask, const int n, + const int an_num, const int grid_num, + const int class_num) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < an_num; j++) { + for (int k = 0; k < grid_num; k++) { + loss_data[i] += L1Loss(input[k], target[k]) * weight[k] * mask[k]; + } + input += (class_num + 5) * grid_num; + target += grid_num; + weight += grid_num; + mask += grid_num; + } + } +} + +template +static void CalcL1LossGrad(T* input_grad, const T* loss_grad, const T* input, + const T* target, const T* weight, const T* mask, + const int n, const int an_num, const int grid_num, + const int class_num) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < an_num; j++) { + for (int k = 0; k < grid_num; k++) { + input_grad[k] = L1LossGrad(input[k], target[k]) * weight[k] * + mask[k] * loss_grad[i]; + } + input_grad += (class_num + 5) * grid_num; + input += (class_num + 5) * grid_num; + target += grid_num; + weight += grid_num; + mask += grid_num; } } } +template +static void CalcYolov3Loss(T* loss_data, const Tensor& input, const Tensor& tx, + const Tensor& ty, const Tensor& tw, const Tensor& th, + const Tensor& tweight, const Tensor& tconf, + const Tensor& tclass, const Tensor& conf_mask, + const Tensor& obj_mask) { + const T* input_data = input.data(); + const T* tx_data = tx.data(); + const T* ty_data = ty.data(); + const T* tw_data = tw.data(); + const T* th_data = th.data(); + const T* tweight_data = tweight.data(); + const T* tconf_data = tconf.data(); + const T* tclass_data = tclass.data(); + const T* conf_mask_data = conf_mask.data(); + const T* obj_mask_data = obj_mask.data(); + + const int n = tclass.dims()[0]; + const int an_num = tclass.dims()[1]; + const int h = tclass.dims()[2]; + const int w = tclass.dims()[3]; + const int class_num = tclass.dims()[4]; + const int grid_num = h * w; + + CalcSCE(loss_data, input_data, tx_data, tweight_data, obj_mask_data, n, + an_num, grid_num, class_num, 1); + CalcSCE(loss_data, input_data + grid_num, ty_data, tweight_data, + obj_mask_data, n, an_num, grid_num, class_num, 1); + CalcL1Loss(loss_data, input_data + 2 * grid_num, tw_data, tweight_data, + obj_mask_data, n, an_num, grid_num, class_num); + CalcL1Loss(loss_data, input_data + 3 * grid_num, th_data, tweight_data, + obj_mask_data, n, an_num, grid_num, class_num); + CalcSCE(loss_data, input_data + 4 * grid_num, tconf_data, conf_mask_data, + conf_mask_data, n, an_num, grid_num, class_num, 1); + CalcSCE(loss_data, input_data + 5 * grid_num, tclass_data, obj_mask_data, + obj_mask_data, n, an_num, grid_num, class_num, class_num); +} + +template +static void CalcYolov3LossGrad(T* input_grad_data, const Tensor& loss_grad, + const Tensor& input, const Tensor& tx, + const Tensor& ty, const Tensor& tw, + const Tensor& th, const Tensor& tweight, + const Tensor& tconf, const Tensor& tclass, + const Tensor& conf_mask, + const Tensor& obj_mask) { + const T* loss_grad_data = loss_grad.data(); + const T* input_data = input.data(); + const T* tx_data = tx.data(); + const T* ty_data = ty.data(); + const T* tw_data = tw.data(); + const T* th_data = th.data(); + const T* tweight_data = tweight.data(); + const T* tconf_data = tconf.data(); + const T* tclass_data = tclass.data(); + const T* conf_mask_data = conf_mask.data(); + const T* obj_mask_data = obj_mask.data(); + + const int n = tclass.dims()[0]; + const int an_num = tclass.dims()[1]; + const int h = tclass.dims()[2]; + const int w = tclass.dims()[3]; + const int class_num = tclass.dims()[4]; + const int grid_num = h * w; + + CalcSCEGrad(input_grad_data, loss_grad_data, input_data, tx_data, + tweight_data, obj_mask_data, n, an_num, grid_num, class_num, + 1); + CalcSCEGrad(input_grad_data + grid_num, loss_grad_data, + input_data + grid_num, ty_data, tweight_data, obj_mask_data, n, + an_num, grid_num, class_num, 1); + CalcL1LossGrad(input_grad_data + 2 * grid_num, loss_grad_data, + input_data + 2 * grid_num, tw_data, tweight_data, + obj_mask_data, n, an_num, grid_num, class_num); + CalcL1LossGrad(input_grad_data + 3 * grid_num, loss_grad_data, + input_data + 3 * grid_num, th_data, tweight_data, + obj_mask_data, n, an_num, grid_num, class_num); + CalcSCEGrad(input_grad_data + 4 * grid_num, loss_grad_data, + input_data + 4 * grid_num, tconf_data, conf_mask_data, + conf_mask_data, n, an_num, grid_num, class_num, 1); + CalcSCEGrad(input_grad_data + 5 * grid_num, loss_grad_data, + input_data + 5 * grid_num, tclass_data, obj_mask_data, + obj_mask_data, n, an_num, grid_num, class_num, class_num); +} + template class Yolov3LossKernel : public framework::OpKernel { public: @@ -357,33 +500,16 @@ class Yolov3LossKernel : public framework::OpKernel { int class_num = ctx.Attr("class_num"); int input_size = ctx.Attr("input_size"); float ignore_thresh = ctx.Attr("ignore_thresh"); - float loss_weight_xy = ctx.Attr("loss_weight_xy"); - float loss_weight_wh = ctx.Attr("loss_weight_wh"); - float loss_weight_conf_target = ctx.Attr("loss_weight_conf_target"); - float loss_weight_conf_notarget = - ctx.Attr("loss_weight_conf_notarget"); - float loss_weight_class = ctx.Attr("loss_weight_class"); const int n = input->dims()[0]; const int h = input->dims()[2]; const int w = input->dims()[3]; const int an_num = anchors.size() / 2; - Tensor pred_x, pred_y, pred_w, pred_h; - Tensor pred_conf, pred_class; - pred_x.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_y.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_w.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_h.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_conf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_class.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - SplitPredResult(*input, &pred_conf, &pred_class, &pred_x, &pred_y, - &pred_w, &pred_h, an_num, class_num); - - Tensor obj_mask, noobj_mask; + Tensor conf_mask, obj_mask; Tensor tx, ty, tw, th, tweight, tconf, tclass; + conf_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); obj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - noobj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); tx.mutable_data({n, an_num, h, w}, ctx.GetPlace()); ty.mutable_data({n, an_num, h, w}, ctx.GetPlace()); tw.mutable_data({n, an_num, h, w}, ctx.GetPlace()); @@ -392,35 +518,13 @@ class Yolov3LossKernel : public framework::OpKernel { tconf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); tclass.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); PreProcessGTBox(*gt_box, *gt_label, ignore_thresh, anchors, input_size, - h, &obj_mask, &noobj_mask, &tx, &ty, &tw, &th, &tweight, + h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th, &tweight, &tconf, &tclass); - Tensor obj_weight; - obj_weight.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - auto obj_weight_t = EigenTensor::From(obj_weight); - auto obj_mask_t = EigenTensor::From(obj_mask); - auto tweight_t = EigenTensor::From(tweight); - obj_weight_t = obj_mask_t * tweight_t; - - Tensor obj_mask_expand; - obj_mask_expand.mutable_data({n, an_num, h, w, class_num}, - ctx.GetPlace()); - auto obj_mask_expand_t = EigenTensor::From(obj_mask_expand); - obj_mask_expand_t = obj_mask_t.reshape(Array5(n, an_num, h, w, 1)) - .broadcast(Array5(1, 1, 1, 1, class_num)); - T* loss_data = loss->mutable_data({n}, ctx.GetPlace()); memset(loss_data, 0, n * sizeof(T)); - CalcSCEWithWeight(pred_x, tx, obj_weight, loss_weight_xy, loss_data); - CalcSCEWithWeight(pred_y, ty, obj_weight, loss_weight_xy, loss_data); - CalcL1LossWithWeight(pred_w, tw, obj_weight, loss_weight_wh, loss_data); - CalcL1LossWithWeight(pred_h, th, obj_weight, loss_weight_wh, loss_data); - CalcSCEWithWeight(pred_conf, tconf, obj_mask, loss_weight_conf_target, - loss_data); - CalcSCEWithWeight(pred_conf, tconf, noobj_mask, - loss_weight_conf_notarget, loss_data); - CalcSCEWithWeight(pred_class, tclass, obj_mask_expand, loss_weight_class, - loss_data); + CalcYolov3Loss(loss_data, *input, tx, ty, tw, th, tweight, tconf, tclass, + conf_mask, obj_mask); } }; @@ -436,14 +540,7 @@ class Yolov3LossGradKernel : public framework::OpKernel { float ignore_thresh = ctx.Attr("ignore_thresh"); auto* input_grad = ctx.Output(framework::GradVarName("X")); auto* loss_grad = ctx.Input(framework::GradVarName("Loss")); - const T* loss_grad_data = loss_grad->data(); int input_size = ctx.Attr("input_size"); - float loss_weight_xy = ctx.Attr("loss_weight_xy"); - float loss_weight_wh = ctx.Attr("loss_weight_wh"); - float loss_weight_conf_target = ctx.Attr("loss_weight_conf_target"); - float loss_weight_conf_notarget = - ctx.Attr("loss_weight_conf_notarget"); - float loss_weight_class = ctx.Attr("loss_weight_class"); const int n = input->dims()[0]; const int c = input->dims()[1]; @@ -451,21 +548,10 @@ class Yolov3LossGradKernel : public framework::OpKernel { const int w = input->dims()[3]; const int an_num = anchors.size() / 2; - Tensor pred_x, pred_y, pred_w, pred_h; - Tensor pred_conf, pred_class; - pred_x.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_y.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_w.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_h.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_conf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - pred_class.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - SplitPredResult(*input, &pred_conf, &pred_class, &pred_x, &pred_y, - &pred_w, &pred_h, an_num, class_num); - - Tensor obj_mask, noobj_mask; + Tensor conf_mask, obj_mask; Tensor tx, ty, tw, th, tweight, tconf, tclass; + conf_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); obj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - noobj_mask.mutable_data({n, an_num, h, w}, ctx.GetPlace()); tx.mutable_data({n, an_num, h, w}, ctx.GetPlace()); ty.mutable_data({n, an_num, h, w}, ctx.GetPlace()); tw.mutable_data({n, an_num, h, w}, ctx.GetPlace()); @@ -474,51 +560,13 @@ class Yolov3LossGradKernel : public framework::OpKernel { tconf.mutable_data({n, an_num, h, w}, ctx.GetPlace()); tclass.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); PreProcessGTBox(*gt_box, *gt_label, ignore_thresh, anchors, input_size, - h, &obj_mask, &noobj_mask, &tx, &ty, &tw, &th, &tweight, + h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th, &tweight, &tconf, &tclass); - Tensor obj_weight; - obj_weight.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - auto obj_weight_t = EigenTensor::From(obj_weight); - auto obj_mask_t = EigenTensor::From(obj_mask); - auto tweight_t = EigenTensor::From(tweight); - obj_weight_t = obj_mask_t * tweight_t; - - Tensor obj_mask_expand; - obj_mask_expand.mutable_data({n, an_num, h, w, class_num}, - ctx.GetPlace()); - auto obj_mask_expand_t = EigenTensor::From(obj_mask_expand); - obj_mask_expand_t = obj_mask_t.reshape(Array5(n, an_num, h, w, 1)) - .broadcast(Array5(1, 1, 1, 1, class_num)); - - Tensor grad_x, grad_y, grad_w, grad_h; - Tensor grad_conf_target, grad_conf_notarget, grad_class; - grad_x.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_y.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_w.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_h.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_conf_target.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_conf_notarget.mutable_data({n, an_num, h, w}, ctx.GetPlace()); - grad_class.mutable_data({n, an_num, h, w, class_num}, ctx.GetPlace()); - CalcSCEGradWithWeight(loss_grad_data, &grad_x, pred_x, tx, obj_weight); - CalcSCEGradWithWeight(loss_grad_data, &grad_y, pred_y, ty, obj_weight); - CalcL1LossGradWithWeight(loss_grad_data, &grad_w, pred_w, tw, - obj_weight); - CalcL1LossGradWithWeight(loss_grad_data, &grad_h, pred_h, th, - obj_weight); - CalcSCEGradWithWeight(loss_grad_data, &grad_conf_target, pred_conf, - tconf, obj_mask); - CalcSCEGradWithWeight(loss_grad_data, &grad_conf_notarget, pred_conf, - tconf, noobj_mask); - CalcSCEGradWithWeight(loss_grad_data, &grad_class, pred_class, tclass, - obj_mask_expand); - - input_grad->mutable_data({n, c, h, w}, ctx.GetPlace()); - AddAllGradToInputGrad(input_grad, grad_x, grad_y, grad_w, grad_h, - grad_conf_target, grad_conf_notarget, grad_class, - class_num, loss_weight_xy, loss_weight_wh, - loss_weight_conf_target, loss_weight_conf_notarget, - loss_weight_class); + T* input_grad_data = + input_grad->mutable_data({n, c, h, w}, ctx.GetPlace()); + CalcYolov3LossGrad(input_grad_data, *loss_grad, *input, tx, ty, tw, th, + tweight, tconf, tclass, conf_mask, obj_mask); } }; diff --git a/python/paddle/fluid/layers/detection.py b/python/paddle/fluid/layers/detection.py index 5fb4588e0b977d185a7d70d11527a73a50277253..caa9b1c3d4723cfab690e42221c3437e70705ef2 100644 --- a/python/paddle/fluid/layers/detection.py +++ b/python/paddle/fluid/layers/detection.py @@ -416,11 +416,6 @@ def yolov3_loss(x, class_num, ignore_thresh, input_size, - loss_weight_xy=None, - loss_weight_wh=None, - loss_weight_conf_target=None, - loss_weight_conf_notarget=None, - loss_weight_class=None, name=None): """ ${comment} @@ -438,11 +433,6 @@ def yolov3_loss(x, class_num (int): ${class_num_comment} ignore_thresh (float): ${ignore_thresh_comment} input_size (int): ${input_size_comment} - loss_weight_xy (float|None): ${loss_weight_xy_comment} - loss_weight_wh (float|None): ${loss_weight_wh_comment} - loss_weight_conf_target (float|None): ${loss_weight_conf_target_comment} - loss_weight_conf_notarget (float|None): ${loss_weight_conf_notarget_comment} - loss_weight_class (float|None): ${loss_weight_class_comment} name (string): the name of yolov3 loss Returns: @@ -495,18 +485,18 @@ def yolov3_loss(x, "input_size": input_size, } - if loss_weight_xy is not None and isinstance(loss_weight_xy, float): - self.attrs['loss_weight_xy'] = loss_weight_xy - if loss_weight_wh is not None and isinstance(loss_weight_wh, float): - self.attrs['loss_weight_wh'] = loss_weight_wh - if loss_weight_conf_target is not None and isinstance( - loss_weight_conf_target, float): - self.attrs['loss_weight_conf_target'] = loss_weight_conf_target - if loss_weight_conf_notarget is not None and isinstance( - loss_weight_conf_notarget, float): - self.attrs['loss_weight_conf_notarget'] = loss_weight_conf_notarget - if loss_weight_class is not None and isinstance(loss_weight_class, float): - self.attrs['loss_weight_class'] = loss_weight_class + # if loss_weight_xy is not None and isinstance(loss_weight_xy, float): + # self.attrs['loss_weight_xy'] = loss_weight_xy + # if loss_weight_wh is not None and isinstance(loss_weight_wh, float): + # self.attrs['loss_weight_wh'] = loss_weight_wh + # if loss_weight_conf_target is not None and isinstance( + # loss_weight_conf_target, float): + # self.attrs['loss_weight_conf_target'] = loss_weight_conf_target + # if loss_weight_conf_notarget is not None and isinstance( + # loss_weight_conf_notarget, float): + # self.attrs['loss_weight_conf_notarget'] = loss_weight_conf_notarget + # if loss_weight_class is not None and isinstance(loss_weight_class, float): + # self.attrs['loss_weight_class'] = loss_weight_class helper.append_op( type='yolov3_loss', diff --git a/python/paddle/fluid/tests/unittests/op_test.py b/python/paddle/fluid/tests/unittests/op_test.py index 9cf398f18f953f96908959da0c5ed72fd7f43722..0fe836683b029698b670bbb9f9bb258c2f3b68a0 100644 --- a/python/paddle/fluid/tests/unittests/op_test.py +++ b/python/paddle/fluid/tests/unittests/op_test.py @@ -470,8 +470,6 @@ class OpTest(unittest.TestCase): ] analytic_grads = self._get_gradient(inputs_to_check, place, output_names, no_grad_set) - # print(numeric_grads[0][0, 4, :, :]) - # print(analytic_grads[0][0, 4, :, :]) self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check, max_relative_error, diff --git a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py index e218031286fc221dab294cfca70ca81a86285856..cf7e2c52893ea4c2fb80ca24bb11d553e3dc19da 100644 --- a/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_yolov3_loss_op.py @@ -80,8 +80,8 @@ def build_target(gtboxes, gtlabel, attrs, grid_size): class_num = attrs["class_num"] input_size = attrs["input_size"] an_num = len(anchors) // 2 + conf_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32') obj_mask = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') - noobj_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32') tx = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') ty = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') tw = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') @@ -114,10 +114,10 @@ def build_target(gtboxes, gtlabel, attrs, grid_size): max_iou = iou best_an_index = k if iou > ignore_thresh: - noobj_mask[i, best_an_index, gj, gi] = 0 + conf_mask[i, best_an_index, gj, gi] = 0 + conf_mask[i, best_an_index, gj, gi] = 1 obj_mask[i, best_an_index, gj, gi] = 1 - noobj_mask[i, best_an_index, gj, gi] = 0 tx[i, best_an_index, gj, gi] = gx - gi ty[i, best_an_index, gj, gi] = gy - gj tw[i, best_an_index, gj, gi] = np.log(gw / anchors[2 * @@ -129,7 +129,7 @@ def build_target(gtboxes, gtlabel, attrs, grid_size): tconf[i, best_an_index, gj, gi] = 1 tcls[i, best_an_index, gj, gi, gt_label] = 1 - return (tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask) + return (tx, ty, tw, th, tweight, tconf, tcls, conf_mask, obj_mask) def YoloV3Loss(x, gtbox, gtlabel, attrs): @@ -144,11 +144,9 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs): pred_conf = x[:, :, :, :, 4] pred_cls = x[:, :, :, :, 5:] - tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask = build_target( + tx, ty, tw, th, tweight, tconf, tcls, conf_mask, obj_mask = build_target( gtbox, gtlabel, attrs, x.shape[2]) - # print("obj_mask: ", obj_mask[0, 0, :, :]) - # print("noobj_mask: ", noobj_mask[0, 0, :, :]) obj_weight = obj_mask * tweight obj_mask_expand = np.tile( np.expand_dims(obj_mask, 4), (1, 1, 1, 1, int(attrs['class_num']))) @@ -156,30 +154,19 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs): loss_y = sce(pred_y, ty, obj_weight) loss_w = l1loss(pred_w, tw, obj_weight) loss_h = l1loss(pred_h, th, obj_weight) - loss_conf_target = sce(pred_conf, tconf, obj_mask) - loss_conf_notarget = sce(pred_conf, tconf, noobj_mask) + loss_obj = sce(pred_conf, tconf, conf_mask) loss_class = sce(pred_cls, tcls, obj_mask_expand) - # print("loss_xy: ", loss_x + loss_y) - # print("loss_wh: ", loss_w + loss_h) - # print("loss_conf_target: ", loss_conf_target) - # print("loss_conf_notarget: ", loss_conf_notarget) - # print("loss_class: ", loss_class) + # print("python loss_xy: ", loss_x + loss_y) + # print("python loss_wh: ", loss_w + loss_h) + # print("python loss_obj: ", loss_obj) + # print("python loss_class: ", loss_class) - return attrs['loss_weight_xy'] * (loss_x + loss_y) \ - + attrs['loss_weight_wh'] * (loss_w + loss_h) \ - + attrs['loss_weight_conf_target'] * loss_conf_target \ - + attrs['loss_weight_conf_notarget'] * loss_conf_notarget \ - + attrs['loss_weight_class'] * loss_class + return loss_x + loss_y + loss_w + loss_h + loss_obj + loss_class class TestYolov3LossOp(OpTest): def setUp(self): - self.loss_weight_xy = 1.0 - self.loss_weight_wh = 1.0 - self.loss_weight_conf_target = 1.0 - self.loss_weight_conf_notarget = 1.0 - self.loss_weight_class = 1.0 self.initTestCase() self.op_type = 'yolov3_loss' x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32')) @@ -192,11 +179,6 @@ class TestYolov3LossOp(OpTest): "class_num": self.class_num, "ignore_thresh": self.ignore_thresh, "input_size": self.input_size, - "loss_weight_xy": self.loss_weight_xy, - "loss_weight_wh": self.loss_weight_wh, - "loss_weight_conf_target": self.loss_weight_conf_target, - "loss_weight_conf_notarget": self.loss_weight_conf_notarget, - "loss_weight_class": self.loss_weight_class, } self.inputs = {'X': x, 'GTBox': gtbox, 'GTLabel': gtlabel} @@ -215,17 +197,12 @@ class TestYolov3LossOp(OpTest): max_relative_error=0.31) def initTestCase(self): - self.anchors = [12, 12] + self.anchors = [12, 12, 11, 13] self.class_num = 5 - self.ignore_thresh = 0.3 + self.ignore_thresh = 0.5 self.input_size = 416 self.x_shape = (3, len(self.anchors) // 2 * (5 + self.class_num), 5, 5) self.gtbox_shape = (3, 5, 4) - self.loss_weight_xy = 1.2 - self.loss_weight_wh = 0.8 - self.loss_weight_conf_target = 2.0 - self.loss_weight_conf_notarget = 1.0 - self.loss_weight_class = 1.5 if __name__ == "__main__":