From 5d0b568ecb58d479619c5a2295d65b7f677d4648 Mon Sep 17 00:00:00 2001
From: dengkaipeng <dengkaipeng@baidu.com>
Date: Tue, 6 Nov 2018 18:42:19 +0800
Subject: [PATCH] Add YOLOv3 loss operator. test=develop

---
 paddle/fluid/operators/yolov3_loss_op.cc | 130 +++++++++
 paddle/fluid/operators/yolov3_loss_op.cu |  23 ++
 paddle/fluid/operators/yolov3_loss_op.h  | 340 +++++++++++++++++++++++
 3 files changed, 493 insertions(+)
 create mode 100644 paddle/fluid/operators/yolov3_loss_op.cc
 create mode 100644 paddle/fluid/operators/yolov3_loss_op.cu
 create mode 100644 paddle/fluid/operators/yolov3_loss_op.h
diff --git a/paddle/fluid/operators/yolov3_loss_op.cc b/paddle/fluid/operators/yolov3_loss_op.cc
new file mode 100644
index 0000000000..b4c6a185e2
--- /dev/null
+++ b/paddle/fluid/operators/yolov3_loss_op.cc
@@ -0,0 +1,130 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#include "paddle/fluid/operators/yolov3_loss_op.h"
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::Tensor;
+
+class Yolov3LossOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of Yolov3LossOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasInput("GTBox"),
+                   "Input(GTBox) of Yolov3LossOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of Yolov3LossOp should not be null.");
+
+    // PADDLE_ENFORCE(ctx->HasAttr("img_height"),
+    //                "Attr(img_height) of Yolov3LossOp should not be null. ");
+    // PADDLE_ENFORCE(ctx->HasAttr("anchors"),
+    //                "Attr(anchor) of Yolov3LossOp should not be null.")
+    // PADDLE_ENFORCE(ctx->HasAttr("class_num"),
+    //                "Attr(class_num) of Yolov3LossOp should not be null.");
+    // PADDLE_ENFORCE(ctx->HasAttr(
+    //     "ignore_thresh",
+    //     "Attr(ignore_thresh) of Yolov3LossOp should not be null."));
+
+    auto dim_x = ctx->GetInputDim("X");
+    auto dim_gt = ctx->GetInputDim("GTBox");
+    auto img_height = ctx->Attrs().Get<int>("img_height");
+    auto anchors = ctx->Attrs().Get<std::vector<int>>("anchors");
+    auto box_num = ctx->Attrs().Get<int>("box_num");
+    auto class_num = ctx->Attrs().Get<int>("class_num");
+    PADDLE_ENFORCE_GT(img_height, 0,
+                      "Attr(img_height) value should be greater then 0");
+    PADDLE_ENFORCE_GT(anchors.size(), 0,
+                      "Attr(anchors) length should be greater then 0.");
+    PADDLE_ENFORCE_EQ(anchors.size() % 2, 0,
+                      "Attr(anchors) length should be even integer.");
+    PADDLE_ENFORCE_GT(box_num, 0,
+                      "Attr(box_num) should be an integer greater then 0.");
+    PADDLE_ENFORCE_GT(class_num, 0,
+                      "Attr(class_num) should be an integer greater then 0.");
+    PADDLE_ENFORCE_EQ(dim_x[1], anchors.size() / 2 * (5 + class_num),
+                      "Input(X) dim[1] should be equal to (anchor_number * (5 "
+                      "+ class_num)).");
+    PADDLE_ENFORCE_EQ(dim_gt.size(), 3, "Input(GTBox) should be a 3-D tensor");
+    PADDLE_ENFORCE_EQ(dim_gt[2], 5, "Input(GTBox) dim[2] should be 5");
+
+    std::vector<int64_t> dim_out({dim_x[0], 1});
+    ctx->SetOutputDim("Out", framework::make_ddim(dim_out));
+  }
+
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace());
+  }
+};
+
+class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X",
+             "The input tensor of bilinear interpolation, "
+             "This is a 4-D tensor with shape of [N, C, H, W]");
+    AddOutput("Out",
+              "The output yolo loss tensor, "
+              "This is a 2-D tensor with shape of [N, 1]");
+
+    AddAttr<int>("box_num", "The number of boxes generated in each grid.");
+    AddAttr<int>("class_num", "The number of classes to predict.");
+    AddComment(R"DOC(
+         This operator generate yolov3 loss by given predict result and ground
+         truth boxes.
+         )DOC");
+  }
+};
+
+class Yolov3LossOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+ protected:
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
+                   "Input(Out@GRAD) should not be null");
+    auto dim_x = ctx->GetInputDim("X");
+    if (ctx->HasOutput(framework::GradVarName("X"))) {
+      ctx->SetOutputDim(framework::GradVarName("X"), dim_x);
+    }
+  }
+
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext& ctx) const override {
+    return framework::OpKernelType(
+        framework::ToDataType(ctx.Input<Tensor>("X")->type()), ctx.GetPlace());
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(yolov3_loss, ops::Yolov3LossOp, ops::Yolov3LossOpMaker,
+                  paddle::framework::DefaultGradOpDescMaker<true>);
+REGISTER_OPERATOR(yolov3_loss_grad, ops::Yolov3LossOpGrad);
+REGISTER_OP_CPU_KERNEL(
+    yolov3_loss,
+    ops::Yolov3LossKernel<paddle::platform::CPUDeviceContext, float>);
+REGISTER_OP_CPU_KERNEL(
+    yolov3_loss_grad,
+    ops::Yolov3LossGradKernel<paddle::platform::CPUDeviceContext, float>);
diff --git a/paddle/fluid/operators/yolov3_loss_op.cu b/paddle/fluid/operators/yolov3_loss_op.cu
new file mode 100644
index 0000000000..48f997456a
--- /dev/null
+++ b/paddle/fluid/operators/yolov3_loss_op.cu
@@ -0,0 +1,23 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#define EIGEN_USE_GPU
+
+#include "paddle/fluid/operators/yolov3_loss_op.h"
+#include "paddle/fluid/platform/cuda_primitives.h"
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    yolov3_loss,
+    ops::Yolov3LossOpKernel<paddle::platform::CUDADeviceContext, float>);
+REGISTER_OP_CUDA_KERNEL(
+    yolov3_loss_grad,
+    ops::Yolov3LossGradOpKernel<paddle::platform::CUDADeviceContext, float>);
diff --git a/paddle/fluid/operators/yolov3_loss_op.h b/paddle/fluid/operators/yolov3_loss_op.h
new file mode 100644
index 0000000000..7950390567
--- /dev/null
+++ b/paddle/fluid/operators/yolov3_loss_op.h
@@ -0,0 +1,340 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License. */
+
+#pragma once
+#include <algorithm>
+#include <vector>
+#include "paddle/fluid/framework/op_registry.h"
+
+namespace paddle {
+namespace operators {
+
+using Tensor = framework::Tensor;
+template <typename T, size_t D, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenTensor = framework::EigenTensor<T, D, MajorType, IndexType>;
+template <typename T, int MajorType = Eigen::RowMajor,
+          typename IndexType = Eigen::DenseIndex>
+using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
+
+using Array2 = Eigen::DSizes<int64_t, 2>;
+using Array4 = Eigen::DSizes<int64_t, 4>;
+
+template <typename T>
+static inline bool isZero(T x) {
+  return abs(x) < 1e-6;
+}
+
+template <typename T>
+static inline T sigmod(T x) {
+  return 1.0 / (exp(-1.0 * x) + 1.0);
+}
+
+template <typename T>
+static inline T CalcMSEWithMask(const Tensor& x, const Tensor& y,
+                                const Tensor& mask) {
+  auto x_t = EigenVector<T>::Flatten(x);
+  auto y_t = EigenVector<T>::Flatten(y);
+  auto mask_t = EigenVector<T>::Flatten(mask);
+  auto result = ((x_t - y_t) * mask_t).pow(2).sum().eval();
+  return result(0);
+}
+
+template <typename T>
+static inline T CalcBCEWithMask(const Tensor& x, const Tensor& y,
+                                const Tensor& mask) {
+  auto x_t = EigenVector<T>::Flatten(x);
+  auto y_t = EigenVector<T>::Flatten(y);
+  auto mask_t = EigenVector<T>::Flatten(mask);
+
+  auto result =
+      ((y_t * (x_t.log()) + (1.0 - y_t) * ((1.0 - x_t).log())) * mask_t)
+          .sum()
+          .eval();
+  return result;
+}
+
+template <typename T>
+static inline T CalcCEWithMask(const Tensor& x, const Tensor& y,
+                               const Tensor& mask) {
+  auto x_t = EigenVector<T>::Flatten(x);
+  auto y_t = EigenVector<T>::Flatten(y);
+  auto mask_t = EigenVector<T>::Flatten(mask);
+}
+
+template <typename T>
+static void CalcPredResult(const Tensor& input, Tensor* pred_boxes,
+                           Tensor* pred_confs, Tensor* pred_classes,
+                           Tensor* pred_x, Tensor* pred_y, Tensor* pred_w,
+                           Tensor* pred_h, std::vector<int> anchors,
+                           const int class_num, const int stride) {
+  const int n = input.dims()[0];
+  const int c = input.dims()[1];
+  const int h = input.dims()[2];
+  const int w = input.dims()[3];
+  const int anchor_num = anchors.size() / 2;
+  const int box_attr_num = 5 + class_num;
+
+  auto input_t = EigenTensor<T, 4>::From(input);
+  auto pred_boxes_t = EigenTensor<T, 5>::From(*pred_boxes);
+  auto pred_confs_t = EigenTensor<T, 4>::From(*pred_confs);
+  auto pred_classes_t = EigenTensor<T, 5>::From(*pred_classes);
+  auto pred_x_t = EigenTensor<T, 4>::From(*pred_x);
+  auto pred_y_t = EigenTensor<T, 4>::From(*pred_y);
+  auto pred_w_t = EigenTensor<T, 4>::From(*pred_w);
+  auto pred_h_t = EigenTensor<T, 4>::From(*pred_h);
+
+  for (int i = 0; i < n; i++) {
+    for (int an_idx = 0; an_idx < anchor_num; an_idx++) {
+      float an_w = anchors[an_idx * 2] / stride;
+      float an_h = anchors[an_idx * 2 + 1] / stride;
+
+      for (int j = 0; j < h; j++) {
+        for (int k = 0; k < w; k++) {
+          pred_x_t(i, an_idx, j, k) =
+              sigmod(input_t(i, box_attr_num * an_idx, j, k));
+          pred_y_t(i, an_idx, j, k) =
+              sigmod(input_t(i, box_attr_num * an_idx + 1, j, k));
+          pred_w_t(i, an_idx, j, k) =
+              sigmod(input_t(i, box_attr_num * an_idx + 2, j, k));
+          pred_h_t(i, an_idx, j, k) =
+              sigmod(input_t(i, box_attr_num * an_idx + 3, j, k));
+
+          pred_boxes_t(i, an_idx, j, k, 0) = pred_x_t(i, an_idx, j, k) + k;
+          pred_boxes_t(i, an_idx, j, k, 1) = pred_y_t(i, an_idx, j, k) + j;
+          pred_boxes_t(i, an_idx, j, k, 2) =
+              exp(pred_w_t(i, an_idx, j, k)) * an_w;
+          pred_boxes_t(i, an_idx, j, k, 3) =
+              exp(pred_h_t(i, an_idx, j, k)) * an_h;
+
+          pred_confs_t(i, an_idx, j, k) =
+              sigmod(input_t(i, box_attr_num * an_idx + 4, j, k));
+
+          for (int c = 0; c < class_num; c++) {
+            pred_classes_t(i, an_idx, j, k, c) =
+                sigmod(input_t(i, box_attr_num * an_idx + 5 + c, j, k));
+          }
+        }
+      }
+    }
+  }
+}
+
+template <typename T>
+static T CalcBoxIoU(std::vector<T> box1, std::vector<T> box2,
+                    bool center_mode) {
+  T b1_x1, b1_x2, b1_y1, b1_y2;
+  T b2_x1, b2_x2, b2_y1, b2_y2;
+  if (center_mode) {
+    b1_x1 = box1[0] - box1[2] / 2;
+    b1_x2 = box1[0] + box1[2] / 2;
+    b1_y1 = box1[1] - box1[3] / 2;
+    b1_y2 = box1[1] + box1[3] / 2;
+    b2_x1 = box2[0] - box2[2] / 2;
+    b2_x2 = box2[0] + box2[2] / 2;
+    b2_y1 = box2[1] - box2[3] / 2;
+    b2_y2 = box2[1] + box2[3] / 2;
+  } else {
+    b1_x1 = box1[0];
+    b1_x2 = box1[1];
+    b1_y1 = box1[2];
+    b1_y2 = box1[3];
+    b2_x1 = box2[0];
+    b2_x2 = box2[0];
+    b2_y1 = box2[1];
+    b2_y2 = box2[1];
+  }
+  T b1_area = (b1_x2 - b1_x1 + 1.0) * (b1_y2 - b1_y1 + 1.0);
+  T b2_area = (b2_x2 - b2_x1 + 1.0) * (b2_y2 - b2_y1 + 1.0);
+
+  T inter_rect_x1 = std::max(b1_x1, b2_x1);
+  T inter_rect_y1 = std::max(b1_y1, b2_y1);
+  T inter_rect_x2 = std::min(b1_x2, b2_x2);
+  T inter_rect_y2 = std::min(b1_y2, b2_y2);
+  T inter_area = std::max(inter_rect_x2 - inter_rect_x1 + 1.0, 0.0) *
+                 std::max(inter_rect_y2 - inter_rect_y1 + 1.0, 0.0);
+
+  return inter_area / (b1_area + b2_area - inter_area + 1e-16);
+}
+
+template <typename T>
+static inline int GetPredLabel(const Tensor& pred_classes, int n,
+                               int best_an_index, int gj, int gi) {
+  auto pred_classes_t = EigenTensor<T, 5>::From(pred_classes);
+  T score = 0.0;
+  int label = -1;
+  for (int i = 0; i < pred_classes.dims()[4]; i++) {
+    if (pred_classes_t(n, best_an_index, gj, gi, i) > score) {
+      score = pred_classes_t(n, best_an_index, gj, gi, i);
+      label = i;
+    }
+  }
+  return label;
+}
+
+template <typename T>
+static void CalcPredBoxWithGTBox(
+    const Tensor& pred_boxes, const Tensor& pred_confs,
+    const Tensor& pred_classes, const Tensor& gt_boxes,
+    std::vector<int> anchors, const float ignore_thresh, const int img_height,
+    int* gt_num, int* correct_num, Tensor* mask_true, Tensor* mask_false,
+    Tensor* tx, Tensor* ty, Tensor* tw, Tensor* th, Tensor* tconf,
+    Tensor* tclass) {
+  const int n = gt_boxes.dims()[0];
+  const int b = gt_boxes.dims()[1];
+  const int grid_size = pred_boxes.dims()[1];
+  const int anchor_num = anchors.size() / 2;
+  auto pred_boxes_t = EigenTensor<T, 5>::From(pred_boxes);
+  auto pred_confs_t = EigenTensor<T, 4>::From(pred_confs);
+  auto pred_classes_t = EigenTensor<T, 5>::From(pred_classes);
+  auto gt_boxes_t = EigenTensor<T, 3>::From(gt_boxes);
+  auto mask_true_t = EigenTensor<int, 4>::From(*mask_true).setConstant(0.0);
+  auto mask_false_t = EigenTensor<int, 4>::From(*mask_false).setConstant(1.0);
+  auto tx_t = EigenTensor<T, 4>::From(*tx).setConstant(0.0);
+  auto ty_t = EigenTensor<T, 4>::From(*ty).setConstant(0.0);
+  auto tw_t = EigenTensor<T, 4>::From(*tw).setConstant(0.0);
+  auto th_t = EigenTensor<T, 4>::From(*th).setConstant(0.0);
+  auto tconf_t = EigenTensor<T, 4>::From(*tconf).setConstant(0.0);
+  auto tclass_t = EigenTensor<T, 5>::From(*tclass).setConstant(0.0);
+
+  *gt_num = 0;
+  *correct_num = 0;
+  for (int i = 0; i < n; i++) {
+    for (int j = 0; j < b; j++) {
+      if (isZero(gt_boxes_t(i, j, 0)) && isZero(gt_boxes_t(i, j, 1)) &&
+          isZero(gt_boxes_t(i, j, 2)) && isZero(gt_boxes_t(i, j, 3))) {
+        continue;
+      }
+
+      *(gt_num)++;
+      int gt_label = gt_boxes_t(i, j, 0);
+      T gx = gt_boxes_t(i, j, 1);
+      T gy = gt_boxes_t(i, j, 2);
+      T gw = gt_boxes_t(i, j, 3);
+      T gh = gt_boxes_t(i, j, 4);
+      int gi = static_cast<int>(gx);
+      int gj = static_cast<int>(gy);
+
+      T max_iou = static_cast<T>(-1);
+      T iou;
+      int best_an_index = -1;
+      std::vector<T> gt_box({0, 0, gw, gh});
+      for (int an_idx = 0; an_idx < anchor_num; an_idx++) {
+        std::vector<T> anchor_shape({0, 0, static_cast<T>(anchors[2 * an_idx]),
+                                     static_cast<T>(anchors[2 * an_idx + 1])});
+        iou = CalcBoxIoU(gt_box, anchor_shape, false);
+        if (iou > max_iou) {
+          max_iou = iou;
+          best_an_index = an_idx;
+        }
+        if (iou > ignore_thresh) {
+          mask_false_t(b, an_idx, gj, gi) = 0;
+        }
+      }
+      mask_true_t(b, best_an_index, gj, gi) = 1;
+      mask_false_t(b, best_an_index, gj, gi) = 1;
+      tx_t(i, best_an_index, gj, gi) = gx - gi;
+      ty_t(i, best_an_index, gj, gi) = gy - gj;
+      tw_t(i, best_an_index, gj, gi) =
+          log(gw / anchors[2 * best_an_index] + 1e-16);
+      th_t(i, best_an_index, gj, gi) =
+          log(gh / anchors[2 * best_an_index + 1] + 1e-16);
+      tclass_t(b, best_an_index, gj, gi, gt_label) = 1;
+      tconf_t(b, best_an_index, gj, gi) = 1;
+
+      std::vector<T> pred_box({
+          pred_boxes_t(i, best_an_index, gj, gi, 0),
+          pred_boxes_t(i, best_an_index, gj, gi, 1),
+          pred_boxes_t(i, best_an_index, gj, gi, 2),
+          pred_boxes_t(i, best_an_index, gj, gi, 3),
+      });
+      gt_box[0] = gx;
+      gt_box[1] = gy;
+      iou = CalcBoxIoU(gt_box, pred_box, true);
+      int pred_label = GetPredLabel<T>(pred_classes, i, best_an_index, gj, gi);
+      T score = pred_confs_t(i, best_an_index, gj, gi);
+      if (iou > 0.5 && pred_label == gt_label && score > 0.5) {
+        (*correct_num)++;
+      }
+    }
+  }
+  mask_false_t = mask_true_t - mask_false_t;
+}
+
+template <typename DeviceContext, typename T>
+class Yolov3LossKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* input = ctx.Input<Tensor>("X");
+    auto* gt_boxes = ctx.Input<Tensor>("GTBox");
+    auto* output = ctx.Output<Tensor>("Out");
+    int img_height = ctx.Attr<int>("img_height");
+    auto anchors = ctx.Attr<std::vector<int>>("anchors");
+    int class_num = ctx.Attr<int>("class_num");
+    float ignore_thresh = ctx.Attr<float>("ignore_thresh");
+
+    const int n = input->dims()[0];
+    const int c = input->dims()[1];
+    const int h = input->dims()[2];
+    const int w = input->dims()[3];
+    const int an_num = anchors.size() / 2;
+    const float stride = static_cast<float>(img_height) / h;
+
+    Tensor pred_x, pred_y, pred_w, pred_h;
+    Tensor pred_boxes, pred_confs, pred_classes;
+    pred_x.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    pred_y.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    pred_w.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    pred_h.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    pred_boxes.mutable_data<T>({n, an_num, h, w, 4}, ctx.GetPlace());
+    pred_confs.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    pred_classes.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
+    CalcPredResult<T>(*input, &pred_boxes, &pred_confs, &pred_classes, &pred_x,
+                      &pred_y, &pred_w, &pred_h, anchors, class_num, stride);
+
+    Tensor mask_true, mask_false;
+    Tensor tx, ty, tw, th, tconf, tclass;
+    mask_true.mutable_data<int>({n, an_num, h, w}, ctx.GetPlace());
+    mask_false.mutable_data<int>({n, an_num, h, w}, ctx.GetPlace());
+    tx.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    ty.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    tw.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    th.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    tconf.mutable_data<T>({n, an_num, h, w}, ctx.GetPlace());
+    tclass.mutable_data<T>({n, an_num, h, w, class_num}, ctx.GetPlace());
+    int gt_num = 0;
+    int correct_num = 0;
+    CalcPredBoxWithGTBox<T>(pred_boxes, pred_confs, pred_classes, *gt_boxes,
+                            anchors, ignore_thresh, img_height, &gt_num,
+                            &correct_num, &mask_true, &mask_false, &tx, &ty,
+                            &tw, &th, &tconf, &tclass);
+
+    T loss_x = CalcMSEWithMask<T>(pred_x, tx, mask_true);
+    T loss_y = CalcMSEWithMask<T>(pred_y, ty, mask_true);
+    T loss_w = CalcMSEWithMask<T>(pred_w, tw, mask_true);
+    T loss_h = CalcMSEWithMask<T>(pred_h, th, mask_true);
+    T loss_conf_true = CalcBCEWithMask<T>(pred_confs, tconf, mask_true);
+    T loss_conf_false = CalcBCEWithMask<T>(pred_confs, tconf, mask_false);
+    // T loss_class = CalcCEWithMask<T>()
+  }
+};
+
+template <typename DeviceContext, typename T>
+class Yolov3LossGradKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const override {
+    auto* d_input_t = ctx.Output<Tensor>(framework::GradVarName("X"));
+    auto* d_output_t = ctx.Input<Tensor>(framework::GradVarName("Out"));
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
-- 
GitLab