[op][arm][kernel]fix: yolo_box updated with paddle-fluid, attr "scale_x_y" and...

[op][arm][kernel]fix: yolo_box updated with paddle-fluid, attr "scale_x_y" and "clip_bbox" supported

[op][arm][kernel]fix: yolo_box updated with paddle-fluid, attr "scale_x_y" and...
[op][arm][kernel]fix: yolo_box updated with paddle-fluid, attr "scale_x_y" and "clip_bbox" supported
85b61a05 · zhangwen31 · 80452148 · 85b61a05 · 85b61a05 · 85b61a05
8 changed file
--- a/lite/backends/arm/math/yolo_box.cc
+++ b/lite/backends/arm/math/yolo_box.cc
@@ -34,9 +34,12 @@ inline void get_yolo_box(float* box,
                         int index,
                         int stride,
                         int img_height,
-                         int img_width) {
-  box[0] = (i + sigmoid(x[index])) * img_width / grid_size;
-  box[1] = (j + sigmoid(x[index + stride])) * img_height / grid_size;
+                         int img_width,
+                         float scale,
+                         float bias) {
+  box[0] = (i + sigmoid(x[index]) * scale + bias) * img_width / grid_size;
+  box[1] =
+      (j + sigmoid(x[index + stride]) * scale + bias) * img_height / grid_size;
  box[2] = std::exp(x[index + 2 * stride]) * anchors[2 * an_idx] * img_width /
           input_size;
  box[3] = std::exp(x[index + 3 * stride]) * anchors[2 * an_idx + 1] *
@@ -57,21 +60,25 @@ inline void calc_detection_box(float* boxes,
                               float* box,
                               const int box_idx,
                               const int img_height,
-                               const int img_width) {
+                               const int img_width,
+                               bool clip_bbox) {
  boxes[box_idx] = box[0] - box[2] / 2;
  boxes[box_idx + 1] = box[1] - box[3] / 2;
  boxes[box_idx + 2] = box[0] + box[2] / 2;
  boxes[box_idx + 3] = box[1] + box[3] / 2;

-  boxes[box_idx] = boxes[box_idx] > 0 ? boxes[box_idx] : static_cast<float>(0);
-  boxes[box_idx + 1] =
-      boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast<float>(0);
-  boxes[box_idx + 2] = boxes[box_idx + 2] < img_width - 1
-                           ? boxes[box_idx + 2]
-                           : static_cast<float>(img_width - 1);
-  boxes[box_idx + 3] = boxes[box_idx + 3] < img_height - 1
-                           ? boxes[box_idx + 3]
-                           : static_cast<float>(img_height - 1);
+  if (clip_bbox) {
+    boxes[box_idx] =
+        boxes[box_idx] > 0 ? boxes[box_idx] : static_cast<float>(0);
+    boxes[box_idx + 1] =
+        boxes[box_idx + 1] > 0 ? boxes[box_idx + 1] : static_cast<float>(0);
+    boxes[box_idx + 2] = boxes[box_idx + 2] < img_width - 1
+                             ? boxes[box_idx + 2]
+                             : static_cast<float>(img_width - 1);
+    boxes[box_idx + 3] = boxes[box_idx + 3] < img_height - 1
+                             ? boxes[box_idx + 3]
+                             : static_cast<float>(img_height - 1);
+  }
 }

 inline void calc_label_score(float* scores,
@@ -94,7 +101,10 @@ void yolobox(lite::Tensor* X,
             std::vector<int> anchors,
             int class_num,
             float conf_thresh,
-             int downsample_ratio) {
+             int downsample_ratio,
+             bool clip_bbox,
+             float scale,
+             float bias) {
  const int n = X->dims()[0];
  const int h = X->dims()[2];
  const int w = X->dims()[3];
@@ -111,8 +121,10 @@ void yolobox(lite::Tensor* X,
  int* ImgSize_data = ImgSize->mutable_data<int>();

  float* Boxes_data = Boxes->mutable_data<float>();
+  memset(Boxes_data, 0, Boxes->numel() * sizeof(float));

  float* Scores_data = Scores->mutable_data<float>();
+  memset(Scores_data, 0, Scores->numel() * sizeof(float));

  float box[4];
  for (int i = 0; i < n; i++) {
@@ -142,9 +154,12 @@ void yolobox(lite::Tensor* X,
                       box_idx,
                       stride,
                       img_height,
-                       img_width);
+                       img_width,
+                       scale,
+                       bias);
          box_idx = (i * b_num + j * stride + k * w + l) * 4;
-          calc_detection_box(Boxes_data, box, box_idx, img_height, img_width);
+          calc_detection_box(
+              Boxes_data, box, box_idx, img_height, img_width, clip_bbox);

          int label_idx =
              get_entry_index(i, j, k * w + l, an_num, an_stride, stride, 5);

--- a/lite/backends/arm/math/yolo_box.h
+++ b/lite/backends/arm/math/yolo_box.h
@@ -29,7 +29,10 @@ void yolobox(lite::Tensor* X,
             std::vector<int> anchors,
             int class_num,
             float conf_thresh,
-             int downsample_ratio);
+             int downsample_ratio,
+             bool clip_bbox,
+             float scale,
+             float bias);

 }  // namespace math
 }  // namespace arm

--- a/lite/kernels/arm/yolo_box_compute.cc
+++ b/lite/kernels/arm/yolo_box_compute.cc
@@ -32,6 +32,9 @@ void YoloBoxCompute::Run() {
  int class_num = param.class_num;
  float conf_thresh = param.conf_thresh;
  int downsample_ratio = param.downsample_ratio;
+  bool clip_bbox = param.clip_bbox;
+  float scale_x_y = param.scale_x_y;
+  float bias = -0.5 * (scale_x_y - 1.);
  Boxes->clear();
  Scores->clear();
  lite::arm::math::yolobox(X,
@@ -41,7 +44,10 @@ void YoloBoxCompute::Run() {
                           anchors,
                           class_num,
                           conf_thresh,
-                           downsample_ratio);
+                           downsample_ratio,
+                           clip_bbox,
+                           scale_x_y,
+                           bias);
 }

 }  // namespace arm

--- a/lite/kernels/cuda/yolo_box_compute.cu
+++ b/lite/kernels/cuda/yolo_box_compute.cu
@@ -152,7 +152,7 @@ __global__ void KeYoloBoxFw(const T* input,
        scores, input, label_idx, score_idx, class_num, conf, grid_num);
  }
 }
-
+// fixme: yolo box has updated, check arm kernel to get more info
 void YoloBoxCompute::Run() {
  auto& param = this->Param<param_t>();
  auto& ctx = this->ctx_->template As<CUDAContext>();

--- a/lite/kernels/xpu/bridges/yolo_box_op.cc
+++ b/lite/kernels/xpu/bridges/yolo_box_op.cc
@@ -21,6 +21,7 @@ namespace lite {
 namespace subgraph {
 namespace xpu {

+// fixme: yolo box has updated, check arm kernel to get more info
 int YoloBoxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
  CHECK(ctx != nullptr);
  CHECK(op != nullptr);

--- a/lite/operators/op_params.h
+++ b/lite/operators/op_params.h
@@ -264,6 +264,8 @@ struct YoloBoxParam : ParamBase {
  int class_num{0};
  float conf_thresh{0.f};
  int downsample_ratio{0};
+  bool clip_bbox{true};
+  float scale_x_y{1.0f};
 };

 // For Scale Op

--- a/lite/operators/yolo_box_op.cc
+++ b/lite/operators/yolo_box_op.cc
@@ -72,6 +72,8 @@ bool YoloBoxOp::AttachImpl(const cpp::OpDesc& op_desc, lite::Scope* scope) {
  param_.class_num = op_desc.GetAttr<int>("class_num");
  param_.conf_thresh = op_desc.GetAttr<float>("conf_thresh");
  param_.downsample_ratio = op_desc.GetAttr<int>("downsample_ratio");
+  param_.clip_bbox = op_desc.GetAttr<bool>("clip_bbox");
+  param_.scale_x_y = op_desc.GetAttr<float>("scale_x_y");
  return true;
 }


--- a/lite/tests/kernels/yolo_box_compute_test.cc
+++ b/lite/tests/kernels/yolo_box_compute_test.cc
@@ -100,6 +100,8 @@ class YoloBoxComputeTester : public arena::TestCase {
  int class_num_ = 0;
  float conf_thresh_ = 0.f;
  int downsample_ratio_ = 0;
+  bool clip_bbox_ = true;
+  float scale_x_y_ = 1.0;

  DDim _dims0_{{1, 255, 13, 13}};
  DDim _dims1_{{1, 2}};
@@ -212,6 +214,8 @@ class YoloBoxComputeTester : public arena::TestCase {
    op_desc->SetAttr("class_num", class_num_);
    op_desc->SetAttr("conf_thresh", conf_thresh_);
    op_desc->SetAttr("downsample_ratio", downsample_ratio_);
+    op_desc->SetAttr("clip_bbox", clip_bbox_);
+    op_desc->SetAttr("scale_x_y", scale_x_y_);
  }

  void PrepareData() override {