提交 0c4acc83 编写于 作者: D dengkaipeng

imporve yolo loss implement. test=develop

上级 2fbfef2e
...@@ -34,11 +34,12 @@ class Yolov3LossOp : public framework::OperatorWithKernel { ...@@ -34,11 +34,12 @@ class Yolov3LossOp : public framework::OperatorWithKernel {
auto dim_gtbox = ctx->GetInputDim("GTBox"); auto dim_gtbox = ctx->GetInputDim("GTBox");
auto dim_gtlabel = ctx->GetInputDim("GTLabel"); auto dim_gtlabel = ctx->GetInputDim("GTLabel");
auto anchors = ctx->Attrs().Get<std::vector<int>>("anchors"); auto anchors = ctx->Attrs().Get<std::vector<int>>("anchors");
int anchor_num = anchors.size() / 2;
auto class_num = ctx->Attrs().Get<int>("class_num"); auto class_num = ctx->Attrs().Get<int>("class_num");
PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor."); PADDLE_ENFORCE_EQ(dim_x.size(), 4, "Input(X) should be a 4-D tensor.");
PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3], PADDLE_ENFORCE_EQ(dim_x[2], dim_x[3],
"Input(X) dim[3] and dim[4] should be euqal."); "Input(X) dim[3] and dim[4] should be euqal.");
PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num), PADDLE_ENFORCE_EQ(dim_x[1], anchor_num * (5 + class_num),
"Input(X) dim[1] should be equal to (anchor_number * (5 " "Input(X) dim[1] should be equal to (anchor_number * (5 "
"+ class_num))."); "+ class_num)).");
PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3, PADDLE_ENFORCE_EQ(dim_gtbox.size(), 3,
...@@ -105,20 +106,6 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -105,20 +106,6 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
.SetDefault(406); .SetDefault(406);
AddAttr<float>("ignore_thresh", AddAttr<float>("ignore_thresh",
"The ignore threshold to ignore confidence loss."); "The ignore threshold to ignore confidence loss.");
AddAttr<float>("loss_weight_xy", "The weight of x, y location loss.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_wh", "The weight of w, h location loss.")
.SetDefault(1.0);
AddAttr<float>(
"loss_weight_conf_target",
"The weight of confidence score loss in locations with target object.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_conf_notarget",
"The weight of confidence score loss in locations without "
"target object.")
.SetDefault(1.0);
AddAttr<float>("loss_weight_class", "The weight of classification loss.")
.SetDefault(1.0);
AddComment(R"DOC( AddComment(R"DOC(
This operator generate yolov3 loss by given predict result and ground This operator generate yolov3 loss by given predict result and ground
truth boxes. truth boxes.
......
...@@ -164,48 +164,50 @@ static inline void CalcSCEGradWithWeight(const T* loss_grad, Tensor* grad, ...@@ -164,48 +164,50 @@ static inline void CalcSCEGradWithWeight(const T* loss_grad, Tensor* grad,
} }
} }
template <typename T> // template <typename T>
static void SplitPredResult(const Tensor& input, Tensor* pred_conf, // static void SplitPredResult(const Tensor& input, Tensor* pred_conf,
Tensor* pred_class, Tensor* pred_x, Tensor* pred_y, // Tensor* pred_class, Tensor* pred_x, Tensor*
Tensor* pred_w, Tensor* pred_h, // pred_y,
const int anchor_num, const int class_num) { // Tensor* pred_w, Tensor* pred_h,
const int n = input.dims()[0]; // const int anchor_num, const int class_num) {
const int h = input.dims()[2]; // const int n = input.dims()[0];
const int w = input.dims()[3]; // const int h = input.dims()[2];
const int box_attr_num = 5 + class_num; // const int w = input.dims()[3];
// const int box_attr_num = 5 + class_num;
auto input_t = EigenTensor<T, 4>::From(input); //
auto pred_conf_t = EigenTensor<T, 4>::From(*pred_conf); // auto input_t = EigenTensor<T, 4>::From(input);
auto pred_class_t = EigenTensor<T, 5>::From(*pred_class); // auto pred_conf_t = EigenTensor<T, 4>::From(*pred_conf);
auto pred_x_t = EigenTensor<T, 4>::From(*pred_x); // auto pred_class_t = EigenTensor<T, 5>::From(*pred_class);
auto pred_y_t = EigenTensor<T, 4>::From(*pred_y); // auto pred_x_t = EigenTensor<T, 4>::From(*pred_x);
auto pred_w_t = EigenTensor<T, 4>::From(*pred_w); // auto pred_y_t = EigenTensor<T, 4>::From(*pred_y);
auto pred_h_t = EigenTensor<T, 4>::From(*pred_h); // auto pred_w_t = EigenTensor<T, 4>::From(*pred_w);
// auto pred_h_t = EigenTensor<T, 4>::From(*pred_h);
for (int i = 0; i < n; i++) { //
for (int an_idx = 0; an_idx < anchor_num; an_idx++) { // for (int i = 0; i < n; i++) {
for (int j = 0; j < h; j++) { // for (int an_idx = 0; an_idx < anchor_num; an_idx++) {
for (int k = 0; k < w; k++) { // for (int j = 0; j < h; j++) {
pred_x_t(i, an_idx, j, k) = input_t(i, box_attr_num * an_idx, j, k); // for (int k = 0; k < w; k++) {
pred_y_t(i, an_idx, j, k) = // pred_x_t(i, an_idx, j, k) = input_t(i, box_attr_num * an_idx, j,
input_t(i, box_attr_num * an_idx + 1, j, k); // k);
pred_w_t(i, an_idx, j, k) = // pred_y_t(i, an_idx, j, k) =
input_t(i, box_attr_num * an_idx + 2, j, k); // input_t(i, box_attr_num * an_idx + 1, j, k);
pred_h_t(i, an_idx, j, k) = // pred_w_t(i, an_idx, j, k) =
input_t(i, box_attr_num * an_idx + 3, j, k); // input_t(i, box_attr_num * an_idx + 2, j, k);
// pred_h_t(i, an_idx, j, k) =
pred_conf_t(i, an_idx, j, k) = // input_t(i, box_attr_num * an_idx + 3, j, k);
input_t(i, box_attr_num * an_idx + 4, j, k); //
// pred_conf_t(i, an_idx, j, k) =
for (int c = 0; c < class_num; c++) { // input_t(i, box_attr_num * an_idx + 4, j, k);
pred_class_t(i, an_idx, j, k, c) = //
input_t(i, box_attr_num * an_idx + 5 + c, j, k); // for (int c = 0; c < class_num; c++) {
} // pred_class_t(i, an_idx, j, k, c) =
} // input_t(i, box_attr_num * an_idx + 5 + c, j, k);
} // }
} // }
} // }
} // }
// }
// }
template <typename T> template <typename T>
static T CalcBoxIoU(std::vector<T> box1, std::vector<T> box2) { static T CalcBoxIoU(std::vector<T> box1, std::vector<T> box2) {
...@@ -235,7 +237,7 @@ template <typename T> ...@@ -235,7 +237,7 @@ template <typename T>
static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
const float ignore_thresh, std::vector<int> anchors, const float ignore_thresh, std::vector<int> anchors,
const int input_size, const int grid_size, const int input_size, const int grid_size,
Tensor* obj_mask, Tensor* noobj_mask, Tensor* tx, Tensor* conf_mask, Tensor* obj_mask, Tensor* tx,
Tensor* ty, Tensor* tw, Tensor* th, Tensor* tweight, Tensor* ty, Tensor* tw, Tensor* th, Tensor* tweight,
Tensor* tconf, Tensor* tclass) { Tensor* tconf, Tensor* tclass) {
const int n = gt_box.dims()[0]; const int n = gt_box.dims()[0];
...@@ -243,8 +245,8 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, ...@@ -243,8 +245,8 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
const int anchor_num = anchors.size() / 2; const int anchor_num = anchors.size() / 2;
auto gt_box_t = EigenTensor<T, 3>::From(gt_box); auto gt_box_t = EigenTensor<T, 3>::From(gt_box);
auto gt_label_t = EigenTensor<int, 2>::From(gt_label); auto gt_label_t = EigenTensor<int, 2>::From(gt_label);
auto obj_mask_t = EigenTensor<T, 4>::From(*obj_mask).setConstant(0); auto conf_mask_t = EigenTensor<T, 4>::From(*conf_mask).setConstant(1.0);
auto noobj_mask_t = EigenTensor<T, 4>::From(*noobj_mask).setConstant(1); auto obj_mask_t = EigenTensor<T, 4>::From(*obj_mask).setConstant(0.0);
auto tx_t = EigenTensor<T, 4>::From(*tx).setConstant(0.0); auto tx_t = EigenTensor<T, 4>::From(*tx).setConstant(0.0);
auto ty_t = EigenTensor<T, 4>::From(*ty).setConstant(0.0); auto ty_t = EigenTensor<T, 4>::From(*ty).setConstant(0.0);
auto tw_t = EigenTensor<T, 4>::From(*tw).setConstant(0.0); auto tw_t = EigenTensor<T, 4>::From(*tw).setConstant(0.0);
...@@ -280,11 +282,11 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, ...@@ -280,11 +282,11 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
best_an_index = an_idx; best_an_index = an_idx;
} }
if (iou > ignore_thresh) { if (iou > ignore_thresh) {
noobj_mask_t(i, an_idx, gj, gi) = static_cast<T>(0.0); conf_mask_t(i, an_idx, gj, gi) = static_cast<T>(0.0);
} }
} }
conf_mask_t(i, best_an_index, gj, gi) = static_cast<T>(1.0);
obj_mask_t(i, best_an_index, gj, gi) = static_cast<T>(1.0); obj_mask_t(i, best_an_index, gj, gi) = static_cast<T>(1.0);
noobj_mask_t(i, best_an_index, gj, gi) = static_cast<T>(0.0);
tx_t(i, best_an_index, gj, gi) = gx - gi; tx_t(i, best_an_index, gj, gi) = gx - gi;
ty_t(i, best_an_index, gj, gi) = gy - gj; ty_t(i, best_an_index, gj, gi) = gy - gj;
tw_t(i, best_an_index, gj, gi) = log(gw / anchors[2 * best_an_index]); tw_t(i, best_an_index, gj, gi) = log(gw / anchors[2 * best_an_index]);
...@@ -298,53 +300,194 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label, ...@@ -298,53 +300,194 @@ static void PreProcessGTBox(const Tensor& gt_box, const Tensor& gt_label,
} }
template <typename T> template <typename T>
static void AddAllGradToInputGrad( static T SCE(T x, T label) {
Tensor* grad, const Tensor& grad_x, const Tensor& grad_y, return (x > 0 ? x : 0.0) - x * label + std::log(1.0 + std::exp(-std::abs(x)));
const Tensor& grad_w, const Tensor& grad_h, const Tensor& grad_conf_target, }
const Tensor& grad_conf_notarget, const Tensor& grad_class,
const int class_num, const float loss_weight_xy, const float loss_weight_wh, template <typename T>
const float loss_weight_conf_target, const float loss_weight_conf_notarget, static T L1Loss(T x, T y) {
const float loss_weight_class) { return std::abs(y - x);
const int n = grad_x.dims()[0]; }
const int an_num = grad_x.dims()[1];
const int h = grad_x.dims()[2]; template <typename T>
const int w = grad_x.dims()[3]; static T SCEGrad(T x, T label) {
const int attr_num = class_num + 5; return 1.0 / (1.0 + std::exp(-x)) - label;
auto grad_t = EigenTensor<T, 4>::From(*grad).setConstant(0.0); }
auto grad_x_t = EigenTensor<T, 4>::From(grad_x);
auto grad_y_t = EigenTensor<T, 4>::From(grad_y); template <typename T>
auto grad_w_t = EigenTensor<T, 4>::From(grad_w); static T L1LossGrad(T x, T y) {
auto grad_h_t = EigenTensor<T, 4>::From(grad_h); return x > y ? 1.0 : -1.0;
auto grad_conf_target_t = EigenTensor<T, 4>::From(grad_conf_target); }
auto grad_conf_notarget_t = EigenTensor<T, 4>::From(grad_conf_notarget);
auto grad_class_t = EigenTensor<T, 5>::From(grad_class);
template <typename T>
static void CalcSCE(T* loss_data, const T* input, const T* target,
const T* weight, const T* mask, const int n,
const int an_num, const int grid_num, const int class_num,
const int num) {
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) { for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) { for (int k = 0; k < grid_num; k++) {
for (int l = 0; l < w; l++) { int sub_idx = k * num;
grad_t(i, j * attr_num, k, l) = grad_x_t(i, j, k, l) * loss_weight_xy; for (int l = 0; l < num; l++) {
grad_t(i, j * attr_num + 1, k, l) = loss_data[i] += SCE<T>(input[l * grid_num + k], target[sub_idx + l]) *
grad_y_t(i, j, k, l) * loss_weight_xy; weight[k] * mask[k];
grad_t(i, j * attr_num + 2, k, l) = }
grad_w_t(i, j, k, l) * loss_weight_wh; }
grad_t(i, j * attr_num + 3, k, l) = input += (class_num + 5) * grid_num;
grad_h_t(i, j, k, l) * loss_weight_wh; target += grid_num * num;
grad_t(i, j * attr_num + 4, k, l) = weight += grid_num;
grad_conf_target_t(i, j, k, l) * loss_weight_conf_target; mask += grid_num;
grad_t(i, j * attr_num + 4, k, l) += }
grad_conf_notarget_t(i, j, k, l) * loss_weight_conf_notarget; }
}
for (int c = 0; c < class_num; c++) { template <typename T>
grad_t(i, j * attr_num + 5 + c, k, l) = static void CalcSCEGrad(T* input_grad, const T* loss_grad, const T* input,
grad_class_t(i, j, k, l, c) * loss_weight_class; const T* target, const T* weight, const T* mask,
const int n, const int an_num, const int grid_num,
const int class_num, const int num) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < grid_num; k++) {
int sub_idx = k * num;
for (int l = 0; l < num; l++) {
input_grad[l * grid_num + k] =
SCEGrad<T>(input[l * grid_num + k], target[sub_idx + l]) *
weight[k] * mask[k] * loss_grad[i];
}
} }
input_grad += (class_num + 5) * grid_num;
input += (class_num + 5) * grid_num;
target += grid_num * num;
weight += grid_num;
mask += grid_num;
} }
} }
}
template <typename T>
static void CalcL1Loss(T* loss_data, const T* input, const T* target,
const T* weight, const T* mask, const int n,
const int an_num, const int grid_num,
const int class_num) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < grid_num; k++) {
loss_data[i] += L1Loss<T>(input[k], target[k]) * weight[k] * mask[k];
}
input += (class_num + 5) * grid_num;
target += grid_num;
weight += grid_num;
mask += grid_num;
} }
} }
} }
template <typename T>
static void CalcL1LossGrad(T* input_grad, const T* loss_grad, const T* input,
const T* target, const T* weight, const T* mask,
const int n, const int an_num, const int grid_num,
const int class_num) {
for (int i = 0; i < n; i++) {
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < grid_num; k++) {
input_grad[k] = L1LossGrad<T>(input[k], target[k]) * weight[k] *
mask[k] * loss_grad[i];
}
input_grad += (class_num + 5) * grid_num;
input += (class_num + 5) * grid_num;
target += grid_num;
weight += grid_num;
mask += grid_num;
}
}
}
template <typename T>
static void CalcYolov3Loss(T* loss_data, const Tensor& input, const Tensor& tx,
const Tensor& ty, const Tensor& tw, const Tensor& th,
const Tensor& tweight, const Tensor& tconf,
const Tensor& tclass, const Tensor& conf_mask,
const Tensor& obj_mask) {
const T* input_data = input.data<T>();
const T* tx_data = tx.data<T>();
const T* ty_data = ty.data<T>();
const T* tw_data = tw.data<T>();
const T* th_data = th.data<T>();
const T* tweight_data = tweight.data<T>();
const T* tconf_data = tconf.data<T>();
const T* tclass_data = tclass.data<T>();
const T* conf_mask_data = conf_mask.data<T>();
const T* obj_mask_data = obj_mask.data<T>();
const int n = tclass.dims()[0];
const int an_num = tclass.dims()[1];
const int h = tclass.dims()[2];
const int w = tclass.dims()[3];
const int class_num = tclass.dims()[4];
const int grid_num = h * w;
CalcSCE<T>(loss_data, input_data, tx_data, tweight_data, obj_mask_data, n,
an_num, grid_num, class_num, 1);
CalcSCE<T>(loss_data, input_data + grid_num, ty_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num, 1);
CalcL1Loss<T>(loss_data, input_data + 2 * grid_num, tw_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcL1Loss<T>(loss_data, input_data + 3 * grid_num, th_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcSCE<T>(loss_data, input_data + 4 * grid_num, tconf_data, conf_mask_data,
conf_mask_data, n, an_num, grid_num, class_num, 1);
CalcSCE<T>(loss_data, input_data + 5 * grid_num, tclass_data, obj_mask_data,
obj_mask_data, n, an_num, grid_num, class_num, class_num);
}
template <typename T>
static void CalcYolov3LossGrad(T* input_grad_data, const Tensor& loss_grad,
const Tensor& input, const Tensor& tx,
const Tensor& ty, const Tensor& tw,
const Tensor& th, const Tensor& tweight,
const Tensor& tconf, const Tensor& tclass,
const Tensor& conf_mask,
const Tensor& obj_mask) {
const T* loss_grad_data = loss_grad.data<T>();
const T* input_data = input.data<T>();
const T* tx_data = tx.data<T>();
const T* ty_data = ty.data<T>();
const T* tw_data = tw.data<T>();
const T* th_data = th.data<T>();
const T* tweight_data = tweight.data<T>();
const T* tconf_data = tconf.data<T>();
const T* tclass_data = tclass.data<T>();
const T* conf_mask_data = conf_mask.data<T>();
const T* obj_mask_data = obj_mask.data<T>();
const int n = tclass.dims()[0];
const int an_num = tclass.dims()[1];
const int h = tclass.dims()[2];
const int w = tclass.dims()[3];
const int class_num = tclass.dims()[4];
const int grid_num = h * w;
CalcSCEGrad<T>(input_grad_data, loss_grad_data, input_data, tx_data,
tweight_data, obj_mask_data, n, an_num, grid_num, class_num,
1);
CalcSCEGrad<T>(input_grad_data + grid_num, loss_grad_data,
input_data + grid_num, ty_data, tweight_data, obj_mask_data, n,
an_num, grid_num, class_num, 1);
CalcL1LossGrad<T>(input_grad_data + 2 * grid_num, loss_grad_data,
input_data + 2 * grid_num, tw_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcL1LossGrad<T>(input_grad_data + 3 * grid_num, loss_grad_data,
input_data + 3 * grid_num, th_data, tweight_data,
obj_mask_data, n, an_num, grid_num, class_num);
CalcSCEGrad<T>(input_grad_data + 4 * grid_num, loss_grad_data,
input_data + 4 * grid_num, tconf_data, conf_mask_data,
conf_mask_data, n, an_num, grid_num, class_num, 1);
CalcSCEGrad<T>(input_grad_data + 5 * grid_num, loss_grad_data,
input_data + 5 * grid_num, tclass_data, obj_mask_data,
obj_mask_data, n, an_num, grid_num, class_num, class_num);
}
template <typename T> template <typename T>
class Yolov3LossKernel : public framework::OpKernel<T> { class Yolov3LossKernel : public framework::OpKernel<T> {
public: public:
...@@ -357,33 +500,16 @@ class Yolov3LossKernel : public framework::OpKernel<T> { ...@@ -357,33 +500,16 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
int class_num = ctx.Attr<int>("class_num"); int class_num = ctx.Attr<int>("class_num");
int input_size = ctx.Attr<int>("input_size"); int input_size = ctx.Attr<int>("input_size");
float ignore_thresh = ctx.Attr<float>("ignore_thresh"); float ignore_thresh = ctx.Attr<float>("ignore_thresh");
float loss_weight_xy = ctx.Attr<float>("loss_weight_xy");
float loss_weight_wh = ctx.Attr<float>("loss_weight_wh");
float loss_weight_conf_target = ctx.Attr<float>("loss_weight_conf_target");
float loss_weight_conf_notarget =
ctx.Attr<float>("loss_weight_conf_notarget");
float loss_weight_class = ctx.Attr<float>("loss_weight_class");
const int n = input->dims()[0]; const int n = input->dims()[0];
const int h = input->dims()[2]; const int h = input->dims()[2];
const int w = input->dims()[3]; const int w = input->dims()[3];
const int an_num = anchors.size() / 2; const int an_num = anchors.size() / 2;
Tensor pred_x, pred_y, pred_w, pred_h; Tensor conf_mask, obj_mask;
Tensor pred_conf, pred_class;
pred_x.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_y.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_w.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_h.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_conf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_class.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
SplitPredResult<T>(*input, &pred_conf, &pred_class, &pred_x, &pred_y,
&pred_w, &pred_h, an_num, class_num);
Tensor obj_mask, noobj_mask;
Tensor tx, ty, tw, th, tweight, tconf, tclass; Tensor tx, ty, tw, th, tweight, tconf, tclass;
conf_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
noobj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
...@@ -392,35 +518,13 @@ class Yolov3LossKernel : public framework::OpKernel<T> { ...@@ -392,35 +518,13 @@ class Yolov3LossKernel : public framework::OpKernel<T> {
tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace()); tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors, input_size, PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors, input_size,
h, &obj_mask, &noobj_mask, &tx, &ty, &tw, &th, &tweight, h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th, &tweight,
&tconf, &tclass); &tconf, &tclass);
Tensor obj_weight;
obj_weight.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
auto obj_weight_t = EigenTensor<T, 4>::From(obj_weight);
auto obj_mask_t = EigenTensor<T, 4>::From(obj_mask);
auto tweight_t = EigenTensor<T, 4>::From(tweight);
obj_weight_t = obj_mask_t * tweight_t;
Tensor obj_mask_expand;
obj_mask_expand.mutable_data<T>({n, an_num, h, w, class_num},
ctx.GetPlace());
auto obj_mask_expand_t = EigenTensor<T, 5>::From(obj_mask_expand);
obj_mask_expand_t = obj_mask_t.reshape(Array5(n, an_num, h, w, 1))
.broadcast(Array5(1, 1, 1, 1, class_num));
T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace()); T* loss_data = loss->mutable_data<T>({n}, ctx.GetPlace());
memset(loss_data, 0, n * sizeof(T)); memset(loss_data, 0, n * sizeof(T));
CalcSCEWithWeight<T>(pred_x, tx, obj_weight, loss_weight_xy, loss_data); CalcYolov3Loss<T>(loss_data, *input, tx, ty, tw, th, tweight, tconf, tclass,
CalcSCEWithWeight<T>(pred_y, ty, obj_weight, loss_weight_xy, loss_data); conf_mask, obj_mask);
CalcL1LossWithWeight<T>(pred_w, tw, obj_weight, loss_weight_wh, loss_data);
CalcL1LossWithWeight<T>(pred_h, th, obj_weight, loss_weight_wh, loss_data);
CalcSCEWithWeight<T>(pred_conf, tconf, obj_mask, loss_weight_conf_target,
loss_data);
CalcSCEWithWeight<T>(pred_conf, tconf, noobj_mask,
loss_weight_conf_notarget, loss_data);
CalcSCEWithWeight<T>(pred_class, tclass, obj_mask_expand, loss_weight_class,
loss_data);
} }
}; };
...@@ -436,14 +540,7 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> { ...@@ -436,14 +540,7 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
float ignore_thresh = ctx.Attr<float>("ignore_thresh"); float ignore_thresh = ctx.Attr<float>("ignore_thresh");
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X")); auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss")); auto* loss_grad = ctx.Input<Tensor>(framework::GradVarName("Loss"));
const T* loss_grad_data = loss_grad->data<T>();
int input_size = ctx.Attr<int>("input_size"); int input_size = ctx.Attr<int>("input_size");
float loss_weight_xy = ctx.Attr<float>("loss_weight_xy");
float loss_weight_wh = ctx.Attr<float>("loss_weight_wh");
float loss_weight_conf_target = ctx.Attr<float>("loss_weight_conf_target");
float loss_weight_conf_notarget =
ctx.Attr<float>("loss_weight_conf_notarget");
float loss_weight_class = ctx.Attr<float>("loss_weight_class");
const int n = input->dims()[0]; const int n = input->dims()[0];
const int c = input->dims()[1]; const int c = input->dims()[1];
...@@ -451,21 +548,10 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> { ...@@ -451,21 +548,10 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
const int w = input->dims()[3]; const int w = input->dims()[3];
const int an_num = anchors.size() / 2; const int an_num = anchors.size() / 2;
Tensor pred_x, pred_y, pred_w, pred_h; Tensor conf_mask, obj_mask;
Tensor pred_conf, pred_class;
pred_x.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_y.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_w.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_h.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_conf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
pred_class.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
SplitPredResult<T>(*input, &pred_conf, &pred_class, &pred_x, &pred_y,
&pred_w, &pred_h, an_num, class_num);
Tensor obj_mask, noobj_mask;
Tensor tx, ty, tw, th, tweight, tconf, tclass; Tensor tx, ty, tw, th, tweight, tconf, tclass;
conf_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); obj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
noobj_mask.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
...@@ -474,51 +560,13 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> { ...@@ -474,51 +560,13 @@ class Yolov3LossGradKernel : public framework::OpKernel<T> {
tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace()); tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace()); tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors, input_size, PreProcessGTBox<T>(*gt_box, *gt_label, ignore_thresh, anchors, input_size,
h, &obj_mask, &noobj_mask, &tx, &ty, &tw, &th, &tweight, h, &conf_mask, &obj_mask, &tx, &ty, &tw, &th, &tweight,
&tconf, &tclass); &tconf, &tclass);
Tensor obj_weight; T* input_grad_data =
obj_weight.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
auto obj_weight_t = EigenTensor<T, 4>::From(obj_weight);
auto obj_mask_t = EigenTensor<T, 4>::From(obj_mask);
auto tweight_t = EigenTensor<T, 4>::From(tweight);
obj_weight_t = obj_mask_t * tweight_t;
Tensor obj_mask_expand;
obj_mask_expand.mutable_data<T>({n, an_num, h, w, class_num},
ctx.GetPlace());
auto obj_mask_expand_t = EigenTensor<T, 5>::From(obj_mask_expand);
obj_mask_expand_t = obj_mask_t.reshape(Array5(n, an_num, h, w, 1))
.broadcast(Array5(1, 1, 1, 1, class_num));
Tensor grad_x, grad_y, grad_w, grad_h;
Tensor grad_conf_target, grad_conf_notarget, grad_class;
grad_x.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_y.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_w.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_h.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_conf_target.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_conf_notarget.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
grad_class.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_x, pred_x, tx, obj_weight);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_y, pred_y, ty, obj_weight);
CalcL1LossGradWithWeight<T>(loss_grad_data, &grad_w, pred_w, tw,
obj_weight);
CalcL1LossGradWithWeight<T>(loss_grad_data, &grad_h, pred_h, th,
obj_weight);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_conf_target, pred_conf,
tconf, obj_mask);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_conf_notarget, pred_conf,
tconf, noobj_mask);
CalcSCEGradWithWeight<T>(loss_grad_data, &grad_class, pred_class, tclass,
obj_mask_expand);
input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace()); input_grad->mutable_data<T>({n, c, h, w}, ctx.GetPlace());
AddAllGradToInputGrad<T>(input_grad, grad_x, grad_y, grad_w, grad_h, CalcYolov3LossGrad<T>(input_grad_data, *loss_grad, *input, tx, ty, tw, th,
grad_conf_target, grad_conf_notarget, grad_class, tweight, tconf, tclass, conf_mask, obj_mask);
class_num, loss_weight_xy, loss_weight_wh,
loss_weight_conf_target, loss_weight_conf_notarget,
loss_weight_class);
} }
}; };
......
...@@ -416,11 +416,6 @@ def yolov3_loss(x, ...@@ -416,11 +416,6 @@ def yolov3_loss(x,
class_num, class_num,
ignore_thresh, ignore_thresh,
input_size, input_size,
loss_weight_xy=None,
loss_weight_wh=None,
loss_weight_conf_target=None,
loss_weight_conf_notarget=None,
loss_weight_class=None,
name=None): name=None):
""" """
${comment} ${comment}
...@@ -438,11 +433,6 @@ def yolov3_loss(x, ...@@ -438,11 +433,6 @@ def yolov3_loss(x,
class_num (int): ${class_num_comment} class_num (int): ${class_num_comment}
ignore_thresh (float): ${ignore_thresh_comment} ignore_thresh (float): ${ignore_thresh_comment}
input_size (int): ${input_size_comment} input_size (int): ${input_size_comment}
loss_weight_xy (float|None): ${loss_weight_xy_comment}
loss_weight_wh (float|None): ${loss_weight_wh_comment}
loss_weight_conf_target (float|None): ${loss_weight_conf_target_comment}
loss_weight_conf_notarget (float|None): ${loss_weight_conf_notarget_comment}
loss_weight_class (float|None): ${loss_weight_class_comment}
name (string): the name of yolov3 loss name (string): the name of yolov3 loss
Returns: Returns:
...@@ -495,18 +485,18 @@ def yolov3_loss(x, ...@@ -495,18 +485,18 @@ def yolov3_loss(x,
"input_size": input_size, "input_size": input_size,
} }
if loss_weight_xy is not None and isinstance(loss_weight_xy, float): # if loss_weight_xy is not None and isinstance(loss_weight_xy, float):
self.attrs['loss_weight_xy'] = loss_weight_xy # self.attrs['loss_weight_xy'] = loss_weight_xy
if loss_weight_wh is not None and isinstance(loss_weight_wh, float): # if loss_weight_wh is not None and isinstance(loss_weight_wh, float):
self.attrs['loss_weight_wh'] = loss_weight_wh # self.attrs['loss_weight_wh'] = loss_weight_wh
if loss_weight_conf_target is not None and isinstance( # if loss_weight_conf_target is not None and isinstance(
loss_weight_conf_target, float): # loss_weight_conf_target, float):
self.attrs['loss_weight_conf_target'] = loss_weight_conf_target # self.attrs['loss_weight_conf_target'] = loss_weight_conf_target
if loss_weight_conf_notarget is not None and isinstance( # if loss_weight_conf_notarget is not None and isinstance(
loss_weight_conf_notarget, float): # loss_weight_conf_notarget, float):
self.attrs['loss_weight_conf_notarget'] = loss_weight_conf_notarget # self.attrs['loss_weight_conf_notarget'] = loss_weight_conf_notarget
if loss_weight_class is not None and isinstance(loss_weight_class, float): # if loss_weight_class is not None and isinstance(loss_weight_class, float):
self.attrs['loss_weight_class'] = loss_weight_class # self.attrs['loss_weight_class'] = loss_weight_class
helper.append_op( helper.append_op(
type='yolov3_loss', type='yolov3_loss',
......
...@@ -470,8 +470,6 @@ class OpTest(unittest.TestCase): ...@@ -470,8 +470,6 @@ class OpTest(unittest.TestCase):
] ]
analytic_grads = self._get_gradient(inputs_to_check, place, analytic_grads = self._get_gradient(inputs_to_check, place,
output_names, no_grad_set) output_names, no_grad_set)
# print(numeric_grads[0][0, 4, :, :])
# print(analytic_grads[0][0, 4, :, :])
self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check, self._assert_is_close(numeric_grads, analytic_grads, inputs_to_check,
max_relative_error, max_relative_error,
......
...@@ -80,8 +80,8 @@ def build_target(gtboxes, gtlabel, attrs, grid_size): ...@@ -80,8 +80,8 @@ def build_target(gtboxes, gtlabel, attrs, grid_size):
class_num = attrs["class_num"] class_num = attrs["class_num"]
input_size = attrs["input_size"] input_size = attrs["input_size"]
an_num = len(anchors) // 2 an_num = len(anchors) // 2
conf_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32')
obj_mask = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') obj_mask = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
noobj_mask = np.ones((n, an_num, grid_size, grid_size)).astype('float32')
tx = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') tx = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
ty = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') ty = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
tw = np.zeros((n, an_num, grid_size, grid_size)).astype('float32') tw = np.zeros((n, an_num, grid_size, grid_size)).astype('float32')
...@@ -114,10 +114,10 @@ def build_target(gtboxes, gtlabel, attrs, grid_size): ...@@ -114,10 +114,10 @@ def build_target(gtboxes, gtlabel, attrs, grid_size):
max_iou = iou max_iou = iou
best_an_index = k best_an_index = k
if iou > ignore_thresh: if iou > ignore_thresh:
noobj_mask[i, best_an_index, gj, gi] = 0 conf_mask[i, best_an_index, gj, gi] = 0
conf_mask[i, best_an_index, gj, gi] = 1
obj_mask[i, best_an_index, gj, gi] = 1 obj_mask[i, best_an_index, gj, gi] = 1
noobj_mask[i, best_an_index, gj, gi] = 0
tx[i, best_an_index, gj, gi] = gx - gi tx[i, best_an_index, gj, gi] = gx - gi
ty[i, best_an_index, gj, gi] = gy - gj ty[i, best_an_index, gj, gi] = gy - gj
tw[i, best_an_index, gj, gi] = np.log(gw / anchors[2 * tw[i, best_an_index, gj, gi] = np.log(gw / anchors[2 *
...@@ -129,7 +129,7 @@ def build_target(gtboxes, gtlabel, attrs, grid_size): ...@@ -129,7 +129,7 @@ def build_target(gtboxes, gtlabel, attrs, grid_size):
tconf[i, best_an_index, gj, gi] = 1 tconf[i, best_an_index, gj, gi] = 1
tcls[i, best_an_index, gj, gi, gt_label] = 1 tcls[i, best_an_index, gj, gi, gt_label] = 1
return (tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask) return (tx, ty, tw, th, tweight, tconf, tcls, conf_mask, obj_mask)
def YoloV3Loss(x, gtbox, gtlabel, attrs): def YoloV3Loss(x, gtbox, gtlabel, attrs):
...@@ -144,11 +144,9 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs): ...@@ -144,11 +144,9 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs):
pred_conf = x[:, :, :, :, 4] pred_conf = x[:, :, :, :, 4]
pred_cls = x[:, :, :, :, 5:] pred_cls = x[:, :, :, :, 5:]
tx, ty, tw, th, tweight, tconf, tcls, obj_mask, noobj_mask = build_target( tx, ty, tw, th, tweight, tconf, tcls, conf_mask, obj_mask = build_target(
gtbox, gtlabel, attrs, x.shape[2]) gtbox, gtlabel, attrs, x.shape[2])
# print("obj_mask: ", obj_mask[0, 0, :, :])
# print("noobj_mask: ", noobj_mask[0, 0, :, :])
obj_weight = obj_mask * tweight obj_weight = obj_mask * tweight
obj_mask_expand = np.tile( obj_mask_expand = np.tile(
np.expand_dims(obj_mask, 4), (1, 1, 1, 1, int(attrs['class_num']))) np.expand_dims(obj_mask, 4), (1, 1, 1, 1, int(attrs['class_num'])))
...@@ -156,30 +154,19 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs): ...@@ -156,30 +154,19 @@ def YoloV3Loss(x, gtbox, gtlabel, attrs):
loss_y = sce(pred_y, ty, obj_weight) loss_y = sce(pred_y, ty, obj_weight)
loss_w = l1loss(pred_w, tw, obj_weight) loss_w = l1loss(pred_w, tw, obj_weight)
loss_h = l1loss(pred_h, th, obj_weight) loss_h = l1loss(pred_h, th, obj_weight)
loss_conf_target = sce(pred_conf, tconf, obj_mask) loss_obj = sce(pred_conf, tconf, conf_mask)
loss_conf_notarget = sce(pred_conf, tconf, noobj_mask)
loss_class = sce(pred_cls, tcls, obj_mask_expand) loss_class = sce(pred_cls, tcls, obj_mask_expand)
# print("loss_xy: ", loss_x + loss_y) # print("python loss_xy: ", loss_x + loss_y)
# print("loss_wh: ", loss_w + loss_h) # print("python loss_wh: ", loss_w + loss_h)
# print("loss_conf_target: ", loss_conf_target) # print("python loss_obj: ", loss_obj)
# print("loss_conf_notarget: ", loss_conf_notarget) # print("python loss_class: ", loss_class)
# print("loss_class: ", loss_class)
return attrs['loss_weight_xy'] * (loss_x + loss_y) \ return loss_x + loss_y + loss_w + loss_h + loss_obj + loss_class
+ attrs['loss_weight_wh'] * (loss_w + loss_h) \
+ attrs['loss_weight_conf_target'] * loss_conf_target \
+ attrs['loss_weight_conf_notarget'] * loss_conf_notarget \
+ attrs['loss_weight_class'] * loss_class
class TestYolov3LossOp(OpTest): class TestYolov3LossOp(OpTest):
def setUp(self): def setUp(self):
self.loss_weight_xy = 1.0
self.loss_weight_wh = 1.0
self.loss_weight_conf_target = 1.0
self.loss_weight_conf_notarget = 1.0
self.loss_weight_class = 1.0
self.initTestCase() self.initTestCase()
self.op_type = 'yolov3_loss' self.op_type = 'yolov3_loss'
x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32')) x = logit(np.random.uniform(0, 1, self.x_shape).astype('float32'))
...@@ -192,11 +179,6 @@ class TestYolov3LossOp(OpTest): ...@@ -192,11 +179,6 @@ class TestYolov3LossOp(OpTest):
"class_num": self.class_num, "class_num": self.class_num,
"ignore_thresh": self.ignore_thresh, "ignore_thresh": self.ignore_thresh,
"input_size": self.input_size, "input_size": self.input_size,
"loss_weight_xy": self.loss_weight_xy,
"loss_weight_wh": self.loss_weight_wh,
"loss_weight_conf_target": self.loss_weight_conf_target,
"loss_weight_conf_notarget": self.loss_weight_conf_notarget,
"loss_weight_class": self.loss_weight_class,
} }
self.inputs = {'X': x, 'GTBox': gtbox, 'GTLabel': gtlabel} self.inputs = {'X': x, 'GTBox': gtbox, 'GTLabel': gtlabel}
...@@ -215,17 +197,12 @@ class TestYolov3LossOp(OpTest): ...@@ -215,17 +197,12 @@ class TestYolov3LossOp(OpTest):
max_relative_error=0.31) max_relative_error=0.31)
def initTestCase(self): def initTestCase(self):
self.anchors = [12, 12] self.anchors = [12, 12, 11, 13]
self.class_num = 5 self.class_num = 5
self.ignore_thresh = 0.3 self.ignore_thresh = 0.5
self.input_size = 416 self.input_size = 416
self.x_shape = (3, len(self.anchors) // 2 * (5 + self.class_num), 5, 5) self.x_shape = (3, len(self.anchors) // 2 * (5 + self.class_num), 5, 5)
self.gtbox_shape = (3, 5, 4) self.gtbox_shape = (3, 5, 4)
self.loss_weight_xy = 1.2
self.loss_weight_wh = 0.8
self.loss_weight_conf_target = 2.0
self.loss_weight_conf_notarget = 1.0
self.loss_weight_class = 1.5
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册