[arm] change prior box implement (#4013)

* update prior profile, test=develop * fix review. test=develop * test=develop

[arm] change prior box implement (#4013)
* update prior profile, test=develop * fix review. test=develop * test=develop
98e69581 · HappyAngel · GitHub · 4f3cd537 · 98e69581 · 98e69581
3 changed file
--- a/lite/core/mir/fusion/conv_conv_fuser.cc
+++ b/lite/core/mir/fusion/conv_conv_fuser.cc
@@ -117,11 +117,11 @@ void ConvConvFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
                             << " must be 1";
  }
  for (int i = 0; i < paddings1.size(); i++) {
-    CHECK_EQ(paddings1[i], 1) << "paddings[" << i << "]: " << paddings1[i]
-                              << " must be 1";
+    CHECK_EQ(paddings1[i], 0) << "paddings1[" << i << "]: " << paddings1[i]
+                              << " must be 0";
  }
  for (int i = 0; i < dilations1.size(); i++) {
-    CHECK_EQ(dilations1[i], 1) << "dilations[" << i << "]: " << dilations1[i]
+    CHECK_EQ(dilations1[i], 1) << "dilations1[" << i << "]: " << dilations1[i]
                               << " must be 1";
  }
  // comupte new_wight and new bias

--- a/lite/kernels/arm/prior_box_compute.cc
+++ b/lite/kernels/arm/prior_box_compute.cc
@@ -13,9 +13,11 @@
 // limitations under the License.

 #include "lite/kernels/arm/prior_box_compute.h"
+#include <algorithm>
 #include <string>
 #include <vector>
 #include "lite/backends/arm/math/funcs.h"
+#include "lite/core/target_wrapper.h"

 namespace paddle {
 namespace lite {
@@ -46,9 +48,301 @@ inline void ExpandAspectRatios(const std::vector<float>& input_aspect_ratior,
  }
 }

-void PriorBoxCompute::Run() {
-  auto& param = Param<operators::PriorBoxParam>();
+inline void fast_free(void* ptr) {
+  if (ptr) {
+    free(static_cast<void**>(ptr)[-1]);
+  }
+}
+void density_prior_box(const lite::Tensor* input,
+                       const lite::Tensor* image,
+                       lite::Tensor* boxes,
+                       lite::Tensor* variances,
+                       const std::vector<float>& min_size_,
+                       const std::vector<float>& fixed_size_,
+                       const std::vector<float>& fixed_ratio_,
+                       const std::vector<int>& density_size_,
+                       const std::vector<float>& max_size_,
+                       const std::vector<float>& aspect_ratio_,
+                       const std::vector<float>& variance_,
+                       int img_w_,
+                       int img_h_,
+                       float step_w_,
+                       float step_h_,
+                       float offset_,
+                       int prior_num_,
+                       bool is_flip_,
+                       bool is_clip_,
+                       const std::vector<std::string>& order_,
+                       bool min_max_aspect_ratios_order) {
+  // compute output shape
+  int win1 = input->dims()[3];
+  int hin1 = input->dims()[2];
+  DDim shape_out({hin1, win1, prior_num_, 4});
+  boxes->Resize(shape_out);
+  variances->Resize(shape_out);
+
+  float* _cpu_data = boxes->mutable_data<float>();
+  float* _variance_data = variances->mutable_data<float>();
+
+  const int width = win1;
+  const int height = hin1;
+  int img_width = img_w_;
+  int img_height = img_h_;
+  if (img_width == 0 || img_height == 0) {
+    img_width = image->dims()[3];
+    img_height = image->dims()[2];
+  }
+  float step_w = step_w_;
+  float step_h = step_h_;
+  if (step_w == 0 || step_h == 0) {
+    step_w = static_cast<float>(img_width) / width;
+    step_h = static_cast<float>(img_height) / height;
+  }
+  float offset = offset_;
+  int step_average = static_cast<int>((step_w + step_h) * 0.5);  // add
+  int channel_size = height * width * prior_num_ * 4;
+  int idx = 0;
+  for (int h = 0; h < height; ++h) {
+    for (int w = 0; w < width; ++w) {
+      float center_x = (w + offset) * step_w;
+      float center_y = (h + offset) * step_h;
+      float box_width;
+      float box_height;
+      if (fixed_size_.size() > 0) {
+        // add
+        for (int s = 0; s < fixed_size_.size(); ++s) {
+          int fixed_size = fixed_size_[s];
+          int com_idx = 0;
+          box_width = fixed_size;
+          box_height = fixed_size;
+
+          if (fixed_ratio_.size() > 0) {
+            for (int r = 0; r < fixed_ratio_.size(); ++r) {
+              float ar = fixed_ratio_[r];
+              int density = density_size_[s];
+              int shift = step_average / density;
+              float box_width_ratio = fixed_size_[s] * sqrt(ar);
+              float box_height_ratio = fixed_size_[s] / sqrt(ar);
+
+              for (int p = 0; p < density; ++p) {
+                for (int c = 0; c < density; ++c) {
+                  float center_x_temp =
+                      center_x - step_average / 2.0f + shift / 2.f + c * shift;
+                  float center_y_temp =
+                      center_y - step_average / 2.0f + shift / 2.f + p * shift;
+                  // xmin
+                  _cpu_data[idx++] =
+                      (center_x_temp - box_width_ratio / 2.f) / img_width >= 0
+                          ? (center_x_temp - box_width_ratio / 2.f) / img_width
+                          : 0;
+                  // ymin
+                  _cpu_data[idx++] =
+                      (center_y_temp - box_height_ratio / 2.f) / img_height >= 0
+                          ? (center_y_temp - box_height_ratio / 2.f) /
+                                img_height
+                          : 0;
+                  // xmax
+                  _cpu_data[idx++] =
+                      (center_x_temp + box_width_ratio / 2.f) / img_width <= 1
+                          ? (center_x_temp + box_width_ratio / 2.f) / img_width
+                          : 1;
+                  // ymax
+                  _cpu_data[idx++] =
+                      (center_y_temp + box_height_ratio / 2.f) / img_height <= 1
+                          ? (center_y_temp + box_height_ratio / 2.f) /
+                                img_height
+                          : 1;
+                }
+              }
+            }
+          } else {
+            // this code for density anchor box
+            if (density_size_.size() > 0) {
+              CHECK_EQ(fixed_size_.size(), density_size_.size())
+                  << "fixed_size_ should be same with density_size_";
+              int density = density_size_[s];
+              int shift = fixed_size_[s] / density;
+
+              for (int r = 0; r < density; ++r) {
+                for (int c = 0; c < density; ++c) {
+                  float center_x_temp =
+                      center_x - fixed_size / 2.f + shift / 2.f + c * shift;
+                  float center_y_temp =
+                      center_y - fixed_size / 2.f + shift / 2.f + r * shift;
+                  // xmin
+                  _cpu_data[idx++] =
+                      (center_x_temp - box_width / 2.f) / img_width >= 0
+                          ? (center_x_temp - box_width / 2.f) / img_width
+                          : 0;
+                  // ymin
+                  _cpu_data[idx++] =
+                      (center_y_temp - box_height / 2.f) / img_height >= 0
+                          ? (center_y_temp - box_height / 2.f) / img_height
+                          : 0;
+                  // xmax
+                  _cpu_data[idx++] =
+                      (center_x_temp + box_width / 2.f) / img_width <= 1
+                          ? (center_x_temp + box_width / 2.f) / img_width
+                          : 1;
+                  // ymax
+                  _cpu_data[idx++] =
+                      (center_y_temp + box_height / 2.f) / img_height <= 1
+                          ? (center_y_temp + box_height / 2.f) / img_height
+                          : 1;
+                }
+              }
+            }
+
+            // rest of priors: will never come here!!!
+            for (int r = 0; r < aspect_ratio_.size(); ++r) {
+              float ar = aspect_ratio_[r];
+
+              if (fabs(ar - 1.) < 1e-6) {
+                continue;
+              }
+
+              int density = density_size_[s];
+              int shift = fixed_size_[s] / density;
+              float box_width_ratio = fixed_size_[s] * sqrt(ar);
+              float box_height_ratio = fixed_size_[s] / sqrt(ar);
+
+              for (int p = 0; p < density; ++p) {
+                for (int c = 0; c < density; ++c) {
+                  float center_x_temp =
+                      center_x - fixed_size / 2.f + shift / 2.f + c * shift;
+                  float center_y_temp =
+                      center_y - fixed_size / 2.f + shift / 2.f + p * shift;
+                  // xmin
+                  _cpu_data[idx++] =
+                      (center_x_temp - box_width_ratio / 2.f) / img_width >= 0
+                          ? (center_x_temp - box_width_ratio / 2.f) / img_width
+                          : 0;
+                  // ymin
+                  _cpu_data[idx++] =
+                      (center_y_temp - box_height_ratio / 2.f) / img_height >= 0
+                          ? (center_y_temp - box_height_ratio / 2.f) /
+                                img_height
+                          : 0;
+                  // xmax
+                  _cpu_data[idx++] =
+                      (center_x_temp + box_width_ratio / 2.f) / img_width <= 1
+                          ? (center_x_temp + box_width_ratio / 2.f) / img_width
+                          : 1;
+                  // ymax
+                  _cpu_data[idx++] =
+                      (center_y_temp + box_height_ratio / 2.f) / img_height <= 1
+                          ? (center_y_temp + box_height_ratio / 2.f) /
+                                img_height
+                          : 1;
+                }
+              }
+            }
+          }
+        }
+      } else {
+        float* min_buf = reinterpret_cast<float*>(
+            TargetWrapper<TARGET(kHost)>::Malloc(sizeof(float) * 4));
+        float* max_buf = reinterpret_cast<float*>(
+            TargetWrapper<TARGET(kHost)>::Malloc(sizeof(float) * 4));
+        float* com_buf =
+            reinterpret_cast<float*>(TargetWrapper<TARGET(kHost)>::Malloc(
+                sizeof(float) * aspect_ratio_.size() * 4));
+        for (int s = 0; s < min_size_.size(); ++s) {
+          int min_idx = 0;
+          int max_idx = 0;
+          int com_idx = 0;
+          int min_size = min_size_[s];
+          // first prior: aspect_ratio = 1, size = min_size
+          box_width = box_height = min_size;
+          //! xmin
+          min_buf[min_idx++] = (center_x - box_width / 2.f) / img_width;
+          //! ymin
+          min_buf[min_idx++] = (center_y - box_height / 2.f) / img_height;
+          //! xmax
+          min_buf[min_idx++] = (center_x + box_width / 2.f) / img_width;
+          //! ymax
+          min_buf[min_idx++] = (center_y + box_height / 2.f) / img_height;

+          if (max_size_.size() > 0) {
+            int max_size = max_size_[s];
+            //! second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)
+            box_width = box_height = sqrtf(min_size * max_size);
+            //! xmin
+            max_buf[max_idx++] = (center_x - box_width / 2.f) / img_width;
+            //! ymin
+            max_buf[max_idx++] = (center_y - box_height / 2.f) / img_height;
+            //! xmax
+            max_buf[max_idx++] = (center_x + box_width / 2.f) / img_width;
+            //! ymax
+            max_buf[max_idx++] = (center_y + box_height / 2.f) / img_height;
+          }
+
+          //! rest of priors
+          for (int r = 0; r < aspect_ratio_.size(); ++r) {
+            float ar = aspect_ratio_[r];
+            if (fabs(ar - 1.) < 1e-6) {
+              continue;
+            }
+            box_width = min_size * sqrt(ar);
+            box_height = min_size / sqrt(ar);
+            //! xmin
+            com_buf[com_idx++] = (center_x - box_width / 2.f) / img_width;
+            //! ymin
+            com_buf[com_idx++] = (center_y - box_height / 2.f) / img_height;
+            //! xmax
+            com_buf[com_idx++] = (center_x + box_width / 2.f) / img_width;
+            //! ymax
+            com_buf[com_idx++] = (center_y + box_height / 2.f) / img_height;
+          }
+          if (min_max_aspect_ratios_order) {
+            memcpy(_cpu_data + idx, min_buf, sizeof(float) * min_idx);
+            idx += min_idx;
+            memcpy(_cpu_data + idx, max_buf, sizeof(float) * max_idx);
+            idx += max_idx;
+            memcpy(_cpu_data + idx, com_buf, sizeof(float) * com_idx);
+            idx += com_idx;
+          } else {
+            memcpy(_cpu_data + idx, min_buf, sizeof(float) * min_idx);
+            idx += min_idx;
+            memcpy(_cpu_data + idx, com_buf, sizeof(float) * com_idx);
+            idx += com_idx;
+            memcpy(_cpu_data + idx, max_buf, sizeof(float) * max_idx);
+            idx += max_idx;
+          }
+        }
+        TargetWrapper<TARGET(kHost)>::Free(min_buf);
+        TargetWrapper<TARGET(kHost)>::Free(max_buf);
+        TargetWrapper<TARGET(kHost)>::Free(com_buf);
+      }
+    }
+  }
+  //! clip the prior's coordinate such that it is within [0, 1]
+  if (is_clip_) {
+    for (int d = 0; d < channel_size; ++d) {
+      _cpu_data[d] = std::min(std::max(_cpu_data[d], 0.f), 1.f);
+    }
+  }
+  //! set the variance.
+  int count = 0;
+  for (int h = 0; h < height; ++h) {
+    for (int w = 0; w < width; ++w) {
+      for (int i = 0; i < prior_num_; ++i) {
+        for (int j = 0; j < 4; ++j) {
+          _variance_data[count] = variance_[j];
+          ++count;
+        }
+      }
+    }
+  }
+}
+
+void PriorBoxCompute::ReInitWhenNeeded() {
+  auto& param = this->template Param<param_t>();
+  auto input_dims = param.input->dims();
+  auto image_dims = param.image->dims();
+  if (last_input_shape_ == input_dims && last_image_shape_ == image_dims) {
+    return;
+  }
  bool is_flip = param.flip;
  bool is_clip = param.clip;
  std::vector<float> min_size = param.min_sizes;
@@ -66,25 +360,35 @@ void PriorBoxCompute::Run() {
  prior_num += max_size.size();
  std::vector<std::string> order = param.order;
  bool min_max_aspect_ratios_order = param.min_max_aspect_ratios_order;
+  density_prior_box(param.input,
+                    param.image,
+                    &boxes_tmp_,
+                    &variances_tmp_,
+                    min_size,
+                    std::vector<float>(),
+                    std::vector<float>(),
+                    std::vector<int>(),
+                    max_size,
+                    aspect_ratios_vec,
+                    variance,
+                    img_w,
+                    img_h,
+                    step_w,
+                    step_h,
+                    offset,
+                    prior_num,
+                    is_flip,
+                    is_clip,
+                    order,
+                    min_max_aspect_ratios_order);
+  last_input_shape_ = input_dims;
+  last_image_shape_ = image_dims;
+}

-  lite::arm::math::prior_box(param.input,
-                             param.image,
-                             &param.boxes,
-                             &param.variances,
-                             min_size,
-                             max_size,
-                             aspect_ratios_vec,
-                             variance,
-                             img_w,
-                             img_h,
-                             step_w,
-                             step_h,
-                             offset,
-                             prior_num,
-                             is_flip,
-                             is_clip,
-                             order,
-                             min_max_aspect_ratios_order);
+void PriorBoxCompute::Run() {
+  auto& param = this->template Param<param_t>();
+  param.boxes->CopyDataFrom(boxes_tmp_);
+  param.variances->CopyDataFrom(variances_tmp_);
 }

 }  // namespace arm

--- a/lite/kernels/arm/prior_box_compute.h
+++ b/lite/kernels/arm/prior_box_compute.h
@@ -26,8 +26,14 @@ class PriorBoxCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
  using param_t = operators::PriorBoxParam;

  void Run() override;
-
+  void ReInitWhenNeeded() override;
  virtual ~PriorBoxCompute() = default;
+
+ private:
+  Tensor boxes_tmp_;
+  Tensor variances_tmp_;
+  DDim last_input_shape_;
+  DDim last_image_shape_;
 };

 }  // namespace arm