提交 0c4acc83 编写于 作者: D dengkaipeng

imporve yolo loss implement. test=develop

上级 2fbfef2e
......@@ -34,11 +34,12 @@ class Yolov3LossOp : public framework::OperatorWithKernel {
auto dim_gtbox = ctx->GetInputDim("GTBox");
auto dim_gtlabel = ctx->GetInputDim("GTLabel");
auto anchors = ctx->Attrs().Get<std::vector<int>>("anchors");
int anchor_num = anchors.size() / 2;
auto class_num = ctx->Attrs().Get<int>("class_num");
PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor.");
PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3],
"Input(X) dim[3] and dim[4] should be euqal.");
PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num),
PADDLE_ENFORCE_EQ(dim_x[1], anchor_num * (5 + class_num),
"Input(X) dim[1] should be equal to (anchor_number * (5 "
"+ class_num)).");
PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3,
......@@ -105,20 +106,6 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(406);
AddAttr<float>("ignore_thresh",
"The ignore threshold to ignore confidence loss.");
AddAttr<float>("loss_weight_xy", "The weight of x, y location loss.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_wh", "The weight of w, h location loss.")
.SetDefault(1.0);
AddAttr<float>(
"loss_weight_conf_target",
"The weight of confidence score loss in locations with target object.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_conf_notarget",
"The weight of confidence score loss in locations without "
"target object.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_class", "The weight of classification loss.")
.SetDefault(1.0);
AddComment(R"DOC(
This operator generate yolov3 loss by given predict result and ground
truth boxes.
......
......@@ -164,48 +164,50 @@ static inline void CalcSCEGradWithWeight(const T* loss_grad, Tensor* grad,
}
}
template <typename T>
static void SplitPredResult(const Tensor& input, Tensor* pred_conf,
Tensor* pred_class, Tensor* pred_x, Tensor* pred_y,
Tensor* pred_w, Tensor* pred_h,
const int anchor_num, const int class_num) {
const int n = input.dims()[0];
const int h = input.dims()[2];
const int w = input.dims()[3];
const int box_attr_num = 5 + class_num;
auto input_t = EigenTensor<T, 4>::From(input);
auto pred_conf_t = EigenTensor<T, 4>::From(*pred_conf);
auto pred_class_t = EigenTensor<T, 5>::From(*pred_class);
auto pred_x_t = EigenTensor<T, 4>::From(*pred_x);
auto pred_y_t = EigenTensor<T, 4>::From(*pred_y);
auto pred_w_t = EigenTensor<T, 4>::From(*pred_w);
auto pred_h_t = EigenTensor<T, 4>::From(*pred_h);
for (int i = 0; i < n; i++) {
for (int an_idx = 0; an_idx < anchor_num; an_idx++) {
for (int j = 0; j < h; j++) {
for (int k = 0; k < w; k++) {
pred_x_t(i, an_idx, j, k) = input_t(i, box_attr_num * an_idx, j, k);
pred_y_t(i, an_idx, j, k) =
input_t(i, box_attr_num * an_idx + 1, j, k);
pred_w_t(i, an_idx, j, k) =
input_t(i, box_attr_num * an_idx + 2, j, k);
pred_h_t(i, an_idx, j, k) =
input_t(i, box_attr_num * an_idx + 3, j, k);
pred_conf_t(i, an_idx, j, k) =
input_t(i, box_attr_num * an_idx + 4, j, k);
for (int c = 0; c < class_num; c++) {
pred_class_t(i, an_idx, j, k, c) =
input_t(i, box_attr_num * an_idx + 5 + c, j, k);
}
}
}
}
}
}
// template <typename T>
// static void SplitPredResult(const Tensor& input, Tensor* pred_conf,
// Tensor* pred_class, Tensor* pred_x, Tensor*
// pred_y,
// Tensor* pred_w, Tensor* pred_h,
// const int anchor_num, const int class_num) {
// const int n = input.dims()[0];
// const int h = input.dims()[2];
// const int w = input.dims()[3];
// const int box_attr_num = 5 + class_num;
//
// auto input_t = EigenTensor<T, 4>::From(input);
// auto pred_conf_t = EigenTensor<T, 4>::From(*pred_conf);
// auto pred_class_t = EigenTensor<T, 5>::From(*pred_class);
// auto pred_x_t = EigenTensor<T, 4>::From(*pred_x);
// auto pred_y_t = EigenTensor<T, 4>::From(*pred_y);
// auto pred_w_t = EigenTensor<T, 4>::From(*pred_w);
// auto pred_h_t = EigenTensor<T, 4>::From(*pred_h);
//
// for (int i = 0; i < n; i++) {
// for (int an_idx = 0; an_idx < anchor_num; an_idx++) {
// for (int j = 0; j < h; j++) {
// for (int k = 0; k < w; k++) {
// pred_x_t(i, an_idx, j, k) = input_t(i, box_attr_num * an_idx, j,
// k);
// pred_y_t(i, an_idx, j, k) =
// input_t(i, box_attr_num * an_idx + 1, j, k);
// pred_w_t(i, an_idx, j, k) =
// input_t(i, box_attr_num * an_idx + 2, j, k);
// pred_h_t(i, an_idx, j, k) =
// input_t(i, box_attr_num * an_idx + 3, j, k);
//
// pred_conf_t(i, an_idx, j, k) =
// input_t(i, box_attr_num * an_idx + 4, j, k);
//
// for (int c = 0; c < class_num; c++) {
// pred_class_t(i, an_idx, j, k, c) =
// input_t(i, box_attr_num * an_idx + 5 + c, j, k);
// }
// }
// }
// }
// }
// }
template <typename T>
static T CalcBoxIoU(std::vector<T> box1, std::vector<T> box2) {
......@@ -235,7 +237,7 @@ template <typename T>
static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
const float ignore_thresh, std::vector<int> anchors,
const int input_size, const int grid_size,
Tensor* obj_mask, Tensor* noobj_mask, Tensor* tx,
Tensor* conf_mask, Tensor* obj_mask, Tensor* tx,
Tensor* ty, Tensor* tw, Tensor* th, Tensor* tweight,
Tensor* tconf, Tensor* tclass) {
const int n = gt_box.dims()[0];
......@@ -243,8 +245,8 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
const int anchor_num = anchors.size() / 2;
auto gt_box_t = EigenTensor<T, 3>::From(gt_box);
auto gt_label_t = EigenTensor<int, 2>::From(gt_label);
auto obj_mask_t = EigenTensor<T, 4>::From(*obj_mask).setConstant(0);
auto noobj_mask_t = EigenTensor<T, 4>::From(*noobj_mask).setConstant(1);
auto conf_mask_t = EigenTensor<T, 4>::From(*conf_mask).setConstant(1.0);
auto obj_mask_t = EigenTensor<T, 4>::From(*obj_mask).setConstant(0.0);
auto tx_t = EigenTensor<T, 4>::From(*tx).setConstant(0.0);
auto ty_t = EigenTensor<T, 4>::From(*ty).setConstant(0.0);
auto tw_t = EigenTensor<T, 4>::From(*tw).setConstant(0.0);
......@@ -280,11 +282,11 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
best_an_index = an_idx;
}
if (iou > ignore_thresh) {
noobj_mask_t(i, an_idx, gj, gi) = static_cast<T>(0.0);
conf_mask_t(i, an_idx, gj, gi) = static_cast<T>(0.0);
}
}
conf_mask_t(i, best_an_index, gj, gi) = static_cast<T>(1.0);
obj_mask_t(i, best_an_index, gj, gi) = static_cast<T>(1.0);
noobj_mask_t(i, best_an_index, gj, gi) = static_cast<T>(0.0);
tx_t(i, best_an_index, gj, gi) = gx - gi;
ty_t(i, best_an_index, gj, gi) = gy - gj;
tw_t(i, best_an_index, gj, gi) = log(gw / anchors[2 * best_an_index]);
......@@ -298,53 +300,194 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
}
template <typename T>
static void AddAllGradToInputGrad(
Tensor* grad, const Tensor& grad_x, const Tensor& grad_y,
const Tensor& grad_w, const Tensor& grad_h, const Tensor& grad_conf_target,
const Tensor& grad_conf_notarget, const Tensor& grad_class,
const int class_num, const float loss_weight_xy, const float loss_weight_wh,
const float loss_weight_conf_target, const float loss_weight_conf_notarget,
const float loss_weight_class) {
const int n = grad_x.dims()[0];
const int an_num = grad_x.dims()[1];
const int h = grad_x.dims()[2];
const int w = grad_x.dims()[3];
const int attr_num = class_num + 5;
auto grad_t = EigenTensor<T, 4>::From(*grad).setConstant(0.0);
auto grad_x_t = EigenTensor<T, 4>::From(grad_x);
auto grad_y_t = EigenTensor<T, 4>::From(grad_y);
auto grad_w_t = EigenTensor<T, 4>::From(grad_w);
auto grad_h_t = EigenTensor<T, 4>::From(grad_h);
auto grad_conf_target_t = EigenTensor<T, 4>::From(grad_conf_target);
auto grad_conf_notarget_t = EigenTensor<T, 4>::From(grad_conf_notarget);
auto grad_class_t = EigenTensor<T, 5>::From(grad_class);
static T SCE(T x, T label) {
return (x > 0 ? x : 0.0) - x * label + std::log(1.0 + std::exp(-std::abs(x)));
}
template <typename T>
static T L1Loss(T x, T y) {
return std::abs(y - x);
}
template <typename T>
static T SCEGrad(T x, T label) {
return 1.0 / (1.0 + std::exp(-x)) - label;
}
template <typename T>
static T L1LossGrad(T x, T y) {
return x > y ? 1.0 : -1.0;
}
template <typename T>
static void CalcSCE(T* loss_data, const T* input, const T* target,
const T* weight, const T* mask, const int n,
const int an_num, const int grid_num, const int class_num,
const int num) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) {
for (int l = 0; l < w; l++) {
grad_t(i, j * attr_num, k, l) = grad_x_t(i, j, k, l) * loss_weight_xy;
grad_t(i, j * attr_num + 1, k, l) =
grad_y_t(i, j, k, l) * loss_weight_xy;
grad_t(i, j * attr_num + 2, k, l) =
grad_w_t(i, j, k, l) * loss_weight_wh;
grad_t(i, j * attr_num + 3, k, l) =
grad_h_t(i, j, k, l) * loss_weight_wh;
grad_t(i, j * attr_num + 4, k, l) =
grad_conf_target_t(i, j, k, l) * loss_weight_conf_target;
grad_t(i, j * attr_num + 4, k, l) +=
grad_conf_notarget_t(i, j, k, l) * loss_weight_conf_notarget;
for (int k = 0; k < grid_num; k++) {
int sub_idx = k * num;
for (int l = 0; l < num; l++) {
loss_data[i] += SCE<T>(input[l * grid_num + k], target[sub_idx + l]) *
weight[k] * mask[k];
}
}
input += (class_num + 5) * grid_num;
target += grid_num * num;
weight += grid_num;
mask += grid_num;
}
}
}
for (int c = 0; c < class_num; c++) {
grad_t(i, j * attr_num + 5 + c, k, l) =
grad_class_t(i, j, k, l, c) * loss_weight_class;
template <typename T>
static void CalcSCEGrad(T* input_grad, const T* loss_grad, const T* input,
const T* target, const T* weight, const T* mask,
const int n, const int an_num, const int grid_num,
const int class_num, const int num) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < grid_num; k++) {
int sub_idx = k * num;
for (int l = 0; l < num; l++) {
input_grad[l * grid_num + k] =
SCEGrad<T>(input[l * grid_num + k], target[sub_idx + l]) *
weight[k] * mask[k] * loss_grad[i];
}
}
input_grad += (class_num + 5) * grid_num;
input += (class_num + 5) * grid_num;
target += grid_num * num;
weight += grid_num;
mask += grid_num;
}
}
}
template <typename T>
static void CalcL1Loss(T* loss_data, const T* input, const T* target,
const T* weight, const T* mask, const int n,
const int an_num, const int grid_num,
const int class_num) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < grid_num; k++) {
loss_data[i] += L1Loss<T>(input[k], target[k]) * weight[k] * mask[k];
}
input += (class_num + 5) * grid_num;
target += grid_num;
weight += grid_num;
mask += grid_num;
}
}
}
template <typename T>
static void CalcL1LossGrad(T* input_grad, const T* loss_grad, const T* input,
const T* target, const T* weight, const T* mask,
const int n, const int an_num, const int grid_num,
const int class_num) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < grid_num; k++) {
input_grad[k] = L1LossGrad<T>(input[k], target[k]) * weight[k] *
mask[k] * loss_grad[i];
}
input_grad += (class_num + 5) * grid_num;
input += (class_num + 5) * grid_num;
target += grid_num;
weight += grid_num;
mask += grid_num;
}
}
}
template <typename T>
static void CalcYolov3Loss(T* loss_data, const Tensor& input, const Tensor& tx,
const Tensor& ty, const Tensor& tw, const Tensor& th,
const Tensor& tweight, const Tensor& tconf,
const Tensor& tclass, const Tensor& conf_mask,
const Tensor& obj_mask) {
const T* input_data = input.data<T>();
const T* tx_data = tx.data<T>();
const T* ty_data = ty.data<T>();
const T* tw_data = tw.data<T>();
const T* th_data = th.data<T>();
const T* tweight_data = tweight.data<T>();
const T* tconf_data = tconf.data<T>();
const T* tclass_data = tclass.data<T>();
const T* conf_mask_data = conf_mask.data<T>();
const T* obj_mask_data = obj_mask.data<T>();
const int n = tclass.dims()[0];
const int an_num = tclass.dims()[1];
const int h = tclass.dims()[2];
const int w = tclass.dims()[3];
const int class_num = tclass.dims()[4];
const int grid_num = h * w;
CalcSCE<T>(loss_data, input_data, tx_data, tweight_data, obj_mask_data, n,
an_num, grid_num, class_num, 1);
CalcSCE<T>(loss_data, input_data + grid_num, ty_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num, 1);
CalcL1Loss<T>(loss_data, input_data + 2 * grid_num, tw_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcL1Loss<T>(loss_data, input_data + 3 * grid_num, th_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcSCE<T>(loss_data, input_data + 4 * grid_num, tconf_data, conf_mask_data,
conf_mask_data, n, an_num, grid_num, class_num, 1);
CalcSCE<T>(loss_data, input_data + 5 * grid_num, tclass_data, obj_mask_data,
obj_mask_data, n, an_num, grid_num, class_num, class_num);
}
template <typename T>
static void CalcYolov3LossGrad(T* input_grad_data, const Tensor& loss_grad,
const Tensor& input, const Tensor& tx,
const Tensor& ty, const Tensor& tw,
const Tensor& th, const Tensor& tweight,
const Tensor& tconf, const Tensor& tclass,
const Tensor& conf_mask,
const Tensor& obj_mask) {
const T* loss_grad_data = loss_grad.data<T>();
const T* input_data = input.data<T>();
const T* tx_data = tx.data<T>();
const T* ty_data = ty.data<T>();
const T* tw_data = tw.data<T>();
const T* th_data = th.data<T>();
const T* tweight_data = tweight.data<T>();
const T* tconf_data = tconf.data<T>();
const T* tclass_data = tclass.data<T>();
const T* conf_mask_data = conf_mask.data<T>();
const T* obj_mask_data = obj_mask.data<T>();
const int n = tclass.dims()[0];
const int an_num = tclass.dims()[1];
const int h = tclass.dims()[2];
const int w = tclass.dims()[3];
const int class_num = tclass.dims()[4];
const int grid_num = h * w;
CalcSCEGrad<T>(input_grad_data, loss_grad_data, input_data, tx_data,
tweight_data, obj_mask_data, n, an_num, grid_num, class_num,
1);
CalcSCEGrad<T>(input_grad_data + grid_num, loss_grad_data,
input_data + grid_num, ty_data, tweight_data, obj_mask_data, n,
an_num, grid_num, class_num, 1);
CalcL1LossGrad<T>(input_grad_data + 2 * grid_num, loss_grad_data,
input_data + 2 * grid_num, tw_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcL1LossGrad<T>(input_grad_data + 3 * grid_num, loss_grad_data,
input_data + 3 * grid_num, th_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcSCEGrad<T>(input_grad_data + 4 * grid_num, loss_grad_data,
input_data + 4 * grid_num, tconf_data, conf_mask_data,
conf_mask_data, n, an_num, grid_num, class_num, 1);
CalcSCEGrad<T>(input_grad_data + 5 * grid_num, loss_grad_data,
input_data + 5 * grid_num, tclass_data, obj_mask_data,
obj_mask_data, n, an_num, grid_num, class_num, class_num);
}
template <typename T>
class Yolov3LossKernel : public framework::OpKernel<T> {
public:
......@@ -357,33 +500,16 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
int class_num = ctx.Attr<int>("class_num");
int input_size = ctx.Attr<int>("input_size");
float ignore_thresh = ctx.Attr<float>("ignore_thresh");
float loss_weight_xy = ctx.Attr<float>("loss_weight_xy");
float loss_weight_wh = ctx.Attr<float>("loss_weight_wh");
float loss_weight_conf_target = ctx.Attr<float>("loss_weight_conf_target");
float loss_weight_conf_notarget =
ctx.Attr<float>("loss_weight_conf_notarget");
float loss_weight_class = ctx.Attr<float>("loss_weight_class");
const int n = input->dims()[0];
const int h = input->dims()[2];
const int w = input->dims()[3];
const int an_num = anchors.size() / 2;
Tensor pred_x, pred_y, pred_w, pred_h;
Tensor pred_conf, pred_class;
pred_x.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_y.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_w.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_h.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_conf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_class.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
SplitPredResult<T>(*input, &pred_conf, &pred_class, &pred_x, &pred_y,
&pred_w, &pred_h, an_num, class_num);
Tensor obj_mask, noobj_mask;
Tensor conf_mask, obj_mask;
Tensor tx, ty, tw, th, tweight, tconf, tclass;
conf_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
noobj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
......@@ -392,35 +518,13 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors, input_size,
h, &obj_mask, &noobj_mask, &tx, &ty, &tw, &th, &tweight,
h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th, &tweight,
&tconf, &tclass);
Tensor obj_weight;
obj_weight.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
auto obj_weight_t = EigenTensor<T, 4>::From(obj_weight);
auto obj_mask_t = EigenTensor<T, 4>::From(obj_mask);
auto tweight_t = EigenTensor<T, 4>::From(tweight);
obj_weight_t = obj_mask_t * tweight_t;
Tensor obj_mask_expand;
obj_mask_expand.mutable_data<T>({n, an_num, h, w, class_num},
ctx.GetPlace());
auto obj_mask_expand_t = EigenTensor<T, 5>::From(obj_mask_expand);
obj_mask_expand_t = obj_mask_t.reshape(Array5(n, an_num, h, w, 1))
.broadcast(Array5(1, 1, 1, 1, class_num));
T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace());
memset(loss_data, 0, n * sizeof(T));
CalcSCEWithWeight<T>(pred_x, tx, obj_weight, loss_weight_xy, loss_data);
CalcSCEWithWeight<T>(pred_y, ty, obj_weight, loss_weight_xy, loss_data);
CalcL1LossWithWeight<T>(pred_w, tw, obj_weight, loss_weight_wh, loss_data);
CalcL1LossWithWeight<T>(pred_h, th, obj_weight, loss_weight_wh, loss_data);
CalcSCEWithWeight<T>(pred_conf, tconf, obj_mask, loss_weight_conf_target,
loss_data);
CalcSCEWithWeight<T>(pred_conf, tconf, noobj_mask,
loss_weight_conf_notarget, loss_data);
CalcSCEWithWeight<T>(pred_class, tclass, obj_mask_expand, loss_weight_class,
loss_data);
CalcYolov3Loss<T>(loss_data, *input, tx, ty, tw, th, tweight, tconf, tclass,
conf_mask, obj_mask);
}
};
......@@ -436,14 +540,7 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
float ignore_thresh = ctx.Attr<float>("ignore_thresh");
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
const T* loss_grad_data = loss_grad->data<T>();
int input_size = ctx.Attr<int>("input_size");
float loss_weight_xy = ctx.Attr<float>("loss_weight_xy");
float loss_weight_wh = ctx.Attr<float>("loss_weight_wh");
float loss_weight_conf_target = ctx.Attr<float>("loss_weight_conf_target");
float loss_weight_conf_notarget =
ctx.Attr<float>("loss_weight_conf_notarget");
float loss_weight_class = ctx.Attr<float>("loss_weight_class");
const int n = input->dims()[0];
const int c = input->dims()[1];
......@@ -451,21 +548,10 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
const int w = input->dims()[3];
const int an_num = anchors.size() / 2;
Tensor pred_x, pred_y, pred_w, pred_h;
Tensor pred_conf, pred_class;
pred_x.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_y.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_w.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_h.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_conf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_class.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
SplitPredResult<T>(*input, &pred_conf, &pred_class, &pred_x, &pred_y,
&pred_w, &pred_h, an_num, class_num);
Tensor obj_mask, noobj_mask;
Tensor conf_mask, obj_mask;
Tensor tx, ty, tw, th, tweight, tconf, tclass;
conf_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
noobj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
......@@ -474,51 +560,13 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors, input_size,
h, &obj_mask, &noobj_mask, &tx, &ty, &tw, &th, &tweight,
h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th, &tweight,
&tconf, &tclass);
Tensor obj_weight;
obj_weight.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
auto obj_weight_t = EigenTensor<T, 4>::From(obj_weight);
auto obj_mask_t = EigenTensor<T, 4>::From(obj_mask);
auto tweight_t = EigenTensor<T, 4>::From(tweight);
obj_weight_t = obj_mask_t * tweight_t;
Tensor obj_mask_expand;
obj_mask_expand.mutable_data<T>({n, an_num, h, w, class_num},
ctx.GetPlace());
auto obj_mask_expand_t = EigenTensor<T, 5>::From(obj_mask_expand);
obj_mask_expand_t = obj_mask_t.reshape(Array5(n, an_num, h, w, 1))
.broadcast(Array5(1, 1, 1, 1, class_num));
Tensor grad_x, grad_y, grad_w, grad_h;
Tensor grad_conf_target, grad_conf_notarget, grad_class;
grad_x.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_y.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_w.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_h.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_conf_target.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_conf_notarget.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_class.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_x, pred_x, tx, obj_weight);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_y, pred_y, ty, obj_weight);
CalcL1LossGradWithWeight<T>(loss_grad_data, &grad_w, pred_w, tw,
obj_weight);
CalcL1LossGradWithWeight<T>(loss_grad_data, &grad_h, pred_h, th,
obj_weight);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_conf_target, pred_conf,
tconf, obj_mask);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_conf_notarget, pred_conf,
tconf, noobj_mask);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_class, pred_class, tclass,
obj_mask_expand);
T* input_grad_data =
input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
AddAllGradToInputGrad<T>(input_grad, grad_x, grad_y, grad_w, grad_h,
grad_conf_target, grad_conf_notarget, grad_class,
class_num, loss_weight_xy, loss_weight_wh,
loss_weight_conf_target, loss_weight_conf_notarget,
loss_weight_class);
CalcYolov3LossGrad<T>(input_grad_data, *loss_grad, *input, tx, ty, tw, th,
tweight, tconf, tclass, conf_mask, obj_mask);
}
};
......
......@@ -416,11 +416,6 @@ def yolov3_loss(x,
class_num,
ignore_thresh,
input_size,
loss_weight_xy=None,
loss_weight_wh=None,
loss_weight_conf_target=None,
loss_weight_conf_notarget=None,
loss_weight_class=None,
name=None):
"""
${comment}
......@@ -438,11 +433,6 @@ def yolov3_loss(x,
class_num (int): ${class_num_comment}
ignore_thresh (float): ${ignore_thresh_comment}
input_size (int): ${input_size_comment}
loss_weight_xy (float|None): ${loss_weight_xy_comment}
loss_weight_wh (float|None): ${loss_weight_wh_comment}
loss_weight_conf_target (float|None): ${loss_weight_conf_target_comment}
loss_weight_conf_notarget (float|None): ${loss_weight_conf_notarget_comment}
loss_weight_class (float|None): ${loss_weight_class_comment}
name (string): the name of yolov3 loss
Returns:
......@@ -495,18 +485,18 @@ def yolov3_loss(x,
"input_size": input_size,
}
if loss_weight_xy is not None and isinstance(loss_weight_xy, float):
self.attrs['loss_weight_xy'] = loss_weight_xy
if loss_weight_wh is not None and isinstance(loss_weight_wh, float):
self.attrs['loss_weight_wh'] = loss_weight_wh
if loss_weight_conf_target is not None and isinstance(
loss_weight_conf_target, float):
self.attrs['loss_weight_conf_target'] = loss_weight_conf_target
if loss_weight_conf_notarget is not None and isinstance(
loss_weight_conf_notarget, float):
self.attrs['loss_weight_conf_notarget'] = loss_weight_conf_notarget
if loss_weight_class is not None and isinstance(loss_weight_class, float):
self.attrs['loss_weight_class'] = loss_weight_class
# if loss_weight_xy is not None and isinstance(loss_weight_xy, float):
# self.attrs['loss_weight_xy'] = loss_weight_xy
# if loss_weight_wh is not None and isinstance(loss_weight_wh, float):
# self.attrs['loss_weight_wh'] = loss_weight_wh
# if loss_weight_conf_target is not None and isinstance(
# loss_weight_conf_target, float):
# self.attrs['loss_weight_conf_target'] = loss_weight_conf_target
# if loss_weight_conf_notarget is not None and isinstance(
# loss_weight_conf_notarget, float):
# self.attrs['loss_weight_conf_notarget'] = loss_weight_conf_notarget
# if loss_weight_class is not None and isinstance(loss_weight_class, float):
# self.attrs['loss_weight_class'] = loss_weight_class
helper.append_op(
type='yolov3_loss',
......
......@@ -470,8 +470,6 @@ class OpTest(unittest.TestCase):
]
analytic_grads = self._get_gradient(inputs_to_check, place,
output_names, no_grad_set)
# print(numeric_grads[0][0, 4, :, :])
# print(analytic_grads[0][0, 4, :, :])
self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
max_relative_error,
......
......@@ -80,8 +80,8 @@ def build_target(gtboxes, gtlabel, attrs, grid_size):
class_num = attrs["class_num"]
input_size = attrs["input_size"]
an_num = len(anchors) // 2
conf_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32')
obj_mask = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
noobj_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32')
tx = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
ty = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
tw = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
......@@ -114,10 +114,10 @@ def build_target(gtboxes, gtlabel, attrs, grid_size):
max_iou = iou
best_an_index = k
if iou > ignore_thresh:
noobj_mask[i, best_an_index, gj, gi] = 0
conf_mask[i, best_an_index, gj, gi] = 0
conf_mask[i, best_an_index, gj, gi] = 1
obj_mask[i, best_an_index, gj, gi] = 1
noobj_mask[i, best_an_index, gj, gi] = 0
tx[i, best_an_index, gj, gi] = gx - gi
ty[i, best_an_index, gj, gi] = gy - gj
tw[i, best_an_index, gj, gi] = np.log(gw / anchors[2 *
......@@ -129,7 +129,7 @@ def build_target(gtboxes, gtlabel, attrs, grid_size):
tconf[i, best_an_index, gj, gi] = 1
tcls[i, best_an_index, gj, gi, gt_label] = 1
return (tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask)
return (tx, ty, tw, th, tweight, tconf, tcls, conf_mask, obj_mask)
def YoloV3Loss(x, gtbox, gtlabel, attrs):
......@@ -144,11 +144,9 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs):
pred_conf = x[:, :, :, :, 4]
pred_cls = x[:, :, :, :, 5:]
tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask = build_target(
tx, ty, tw, th, tweight, tconf, tcls, conf_mask, obj_mask = build_target(
gtbox, gtlabel, attrs, x.shape[2])
# print("obj_mask: ", obj_mask[0, 0, :, :])
# print("noobj_mask: ", noobj_mask[0, 0, :, :])
obj_weight = obj_mask * tweight
obj_mask_expand = np.tile(
np.expand_dims(obj_mask, 4), (1, 1, 1, 1, int(attrs['class_num'])))
......@@ -156,30 +154,19 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs):
loss_y = sce(pred_y, ty, obj_weight)
loss_w = l1loss(pred_w, tw, obj_weight)
loss_h = l1loss(pred_h, th, obj_weight)
loss_conf_target = sce(pred_conf, tconf, obj_mask)
loss_conf_notarget = sce(pred_conf, tconf, noobj_mask)
loss_obj = sce(pred_conf, tconf, conf_mask)
loss_class = sce(pred_cls, tcls, obj_mask_expand)
# print("loss_xy: ", loss_x + loss_y)
# print("loss_wh: ", loss_w + loss_h)
# print("loss_conf_target: ", loss_conf_target)
# print("loss_conf_notarget: ", loss_conf_notarget)
# print("loss_class: ", loss_class)
# print("python loss_xy: ", loss_x + loss_y)
# print("python loss_wh: ", loss_w + loss_h)
# print("python loss_obj: ", loss_obj)
# print("python loss_class: ", loss_class)
return attrs['loss_weight_xy'] * (loss_x + loss_y) \
+ attrs['loss_weight_wh'] * (loss_w + loss_h) \
+ attrs['loss_weight_conf_target'] * loss_conf_target \
+ attrs['loss_weight_conf_notarget'] * loss_conf_notarget \
+ attrs['loss_weight_class'] * loss_class
return loss_x + loss_y + loss_w + loss_h + loss_obj + loss_class
class TestYolov3LossOp(OpTest):
def setUp(self):
self.loss_weight_xy = 1.0
self.loss_weight_wh = 1.0
self.loss_weight_conf_target = 1.0
self.loss_weight_conf_notarget = 1.0
self.loss_weight_class = 1.0
self.initTestCase()
self.op_type = 'yolov3_loss'
x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32'))
......@@ -192,11 +179,6 @@ class TestYolov3LossOp(OpTest):
"class_num": self.class_num,
"ignore_thresh": self.ignore_thresh,
"input_size": self.input_size,
"loss_weight_xy": self.loss_weight_xy,
"loss_weight_wh": self.loss_weight_wh,
"loss_weight_conf_target": self.loss_weight_conf_target,
"loss_weight_conf_notarget": self.loss_weight_conf_notarget,
"loss_weight_class": self.loss_weight_class,
}
self.inputs = {'X': x, 'GTBox': gtbox, 'GTLabel': gtlabel}
......@@ -215,17 +197,12 @@ class TestYolov3LossOp(OpTest):
max_relative_error=0.31)
def initTestCase(self):
self.anchors = [12, 12]
self.anchors = [12, 12, 11, 13]
self.class_num = 5
self.ignore_thresh = 0.3
self.ignore_thresh = 0.5
self.input_size = 416
self.x_shape = (3, len(self.anchors) // 2 * (5 + self.class_num), 5, 5)
self.gtbox_shape = (3, 5, 4)
self.loss_weight_xy = 1.2
self.loss_weight_wh = 0.8
self.loss_weight_conf_target = 2.0
self.loss_weight_conf_notarget = 1.0
self.loss_weight_class = 1.5
if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册