[arm]Bilinear resize compute error fix (#4351)

* fix bilinear_resize result not equal with fluid. test=develop * fix cv build error. test=develop * fix format. test=develop

[arm]Bilinear resize compute error fix (#4351)
* fix bilinear_resize result not equal with fluid. test=develop * fix cv build error. test=develop * fix format. test=develop
9a9d1cf2 · HappyAngel · GitHub · db2ab554 · 9a9d1cf2 · 9a9d1cf2
9 changed file
--- a/lite/backends/arm/math/interpolate.cc
+++ b/lite/backends/arm/math/interpolate.cc
@@ -70,7 +70,8 @@ void bilinear_interp(const float* src,
                     int h_out,
                     float scale_x,
                     float scale_y,
-                     bool with_align) {
+                     bool align_corners,
+                     bool align_mode) {
  int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2];
  int* xofs = buf;
@@ -78,14 +79,13 @@ void bilinear_interp(const float* src,
  float* alpha = reinterpret_cast<float*>(buf + w_out + h_out);
  float* beta = reinterpret_cast<float*>(buf + w_out + h_out + w_out * 2);
+  bool with_align = (align_mode == 0 && !align_corners);
  float fx = 0.0f;
  float fy = 0.0f;
  int sx = 0;
  int sy = 0;
-  if (with_align) {
+  if (!with_align) {
-    scale_x = static_cast<float>(w_in - 1) / (w_out - 1);
-    scale_y = static_cast<float>(h_in - 1) / (h_out - 1);
    // calculate x axis coordinate
    for (int dx = 0; dx < w_out; dx++) {
      fx = dx * scale_x;
@@ -105,8 +105,6 @@ void bilinear_interp(const float* src,
      beta[dy * 2 + 1] = fy;
    }
  } else {
-    scale_x = static_cast<float>(w_in) / w_out;
-    scale_y = static_cast<float>(h_in) / h_out;
    // calculate x axis coordinate
    for (int dx = 0; dx < w_out; dx++) {
      fx = scale_x * (dx + 0.5f) - 0.5f;
@@ -468,15 +466,9 @@ void nearest_interp(const float* src,
                    float* dst,
                    int w_out,
                    int h_out,
-                    float scale_x,
+                    float scale_w_new,
-                    float scale_y,
+                    float scale_h_new,
                    bool with_align) {
-  float scale_w_new = (with_align)
-                          ? (static_cast<float>(w_in - 1) / (w_out - 1))
-                          : (static_cast<float>(w_in) / (w_out));
-  float scale_h_new = (with_align)
-                          ? (static_cast<float>(h_in - 1) / (h_out - 1))
-                          : (static_cast<float>(h_in) / (h_out));
  if (with_align) {
    for (int h = 0; h < h_out; ++h) {
      float* dst_p = dst + h * w_out;
@@ -506,7 +498,8 @@ void interpolate(lite::Tensor* X,
                 int out_height,
                 int out_width,
                 float scale,
-                 bool with_align,
+                 bool align_corners,
+                 bool align_mode,
                 std::string interpolate_type) {
  int in_h = X->dims()[2];
  int in_w = X->dims()[3];
@@ -531,12 +524,12 @@ void interpolate(lite::Tensor* X,
      out_width = out_size_data[1];
    }
  }
-  float height_scale = scale;
+  // float height_scale = scale;
-  float width_scale = scale;
+  // float width_scale = scale;
-  if (out_width > 0 && out_height > 0) {
+  // if (out_width > 0 && out_height > 0) {
-    height_scale = static_cast<float>(out_height / X->dims()[2]);
+  //   height_scale = static_cast<float>(out_height / X->dims()[2]);
-    width_scale = static_cast<float>(out_width / X->dims()[3]);
+  //   width_scale = static_cast<float>(out_width / X->dims()[3]);
-  }
+  // }
  int num_cout = X->dims()[0];
  int c_cout = X->dims()[1];
  Out->Resize({num_cout, c_cout, out_height, out_width});
@@ -551,6 +544,10 @@ void interpolate(lite::Tensor* X,
  int spatial_in = in_h * in_w;
  int spatial_out = out_h * out_w;
+  float scale_x = (align_corners) ? (static_cast<float>(in_w - 1) / (out_w - 1))
+                                  : (static_cast<float>(in_w) / (out_w));
+  float scale_y = (align_corners) ? (static_cast<float>(in_h - 1) / (out_h - 1))
+                                  : (static_cast<float>(in_h) / (out_h));
  if ("Bilinear" == interpolate_type) {
 #pragma omp parallel for
    for (int i = 0; i < count; ++i) {
@@ -560,9 +557,10 @@ void interpolate(lite::Tensor* X,
                      dout + spatial_out * i,
                      out_w,
                      out_h,
-                      1.f / width_scale,
+                      scale_x,
-                      1.f / height_scale,
+                      scale_y,
-                      with_align);
+                      align_corners,
+                      align_mode);
    }
  } else if ("Nearest" == interpolate_type) {
 #pragma omp parallel for
@@ -573,9 +571,9 @@ void interpolate(lite::Tensor* X,
                     dout + spatial_out * i,
                     out_w,
                     out_h,
-                     1.f / width_scale,
+                     scale_x,
-                     1.f / height_scale,
+                     scale_y,
-                     with_align);
+                     align_corners);
    }
  }
 }

--- a/lite/backends/arm/math/interpolate.h
+++ b/lite/backends/arm/math/interpolate.h
@@ -30,7 +30,8 @@ void bilinear_interp(const float* src,
                     int h_out,
                     float scale_x,
                     float scale_y,
-                     bool with_align);
+                     bool align_corners,
+                     bool align_mode);
 void nearest_interp(const float* src,
                    int w_in,
@@ -40,7 +41,7 @@ void nearest_interp(const float* src,
                    int h_out,
                    float scale_x,
                    float scale_y,
-                    bool with_align);
+                    bool align_corners);
 void interpolate(lite::Tensor* X,
                 lite::Tensor* OutSize,
@@ -50,7 +51,8 @@ void interpolate(lite::Tensor* X,
                 int out_height,
                 int out_width,
                 float scale,
-                 bool with_align,
+                 bool align_corners,
+                 bool align_mode,
                 std::string interpolate_type);
 } /* namespace math */

--- a/lite/demo/cxx/test_cv/test_img_prepross.cc
+++ b/lite/demo/cxx/test_cv/test_img_prepross.cc
@@ -128,7 +128,7 @@ bool test_convert(bool cv_run,
  for (int i = 0; i < test_iter; i++) {
    clock_t begin = clock();
    // resize default linear
-    image_preprocess.imageConvert(src, resize_lite);
+    image_preprocess.image_convert(src, resize_lite);
    clock_t end = clock();
    to_lite += (end - begin);
  }
@@ -226,7 +226,7 @@ bool test_flip(bool cv_run,
  for (int i = 0; i < test_iter; i++) {
    clock_t begin = clock();
    // resize default linear
-    image_preprocess.imageFlip(src, resize_lite);
+    image_preprocess.image_flip(src, resize_lite);
    clock_t end = clock();
    to_lite += (end - begin);
  }
@@ -330,7 +330,7 @@ bool test_rotate(bool cv_run,
  for (int i = 0; i < test_iter; i++) {
    clock_t begin = clock();
    // resize default linear
-    image_preprocess.imageRotate(src, resize_lite);
+    image_preprocess.image_rotate(src, resize_lite);
    clock_t end = clock();
    to_lite += (end - begin);
  }
@@ -426,7 +426,7 @@ bool test_resize(bool cv_run,
  for (int i = 0; i < test_iter; i++) {
    clock_t begin = clock();
    // resize default linear
-    image_preprocess.imageResize(src, resize_lite);
+    image_preprocess.image_resize(src, resize_lite);
    clock_t end = clock();
    to_lite += (end - begin);
  }
@@ -526,7 +526,7 @@ bool test_crop(bool cv_run,
  std::cout << "lite compute:" << std::endl;
  for (int i = 0; i < test_iter; i++) {
    clock_t begin = clock();
-    image_preprocess.imageCrop(
+    image_preprocess.image_crop(
        src, resize_lite, dstFormat, srcw, srch, left_x, left_y, dstw, dsth);
    clock_t end = clock();
    to_lite += (end - begin);

--- a/lite/demo/cxx/test_cv/test_model_cv.cc
+++ b/lite/demo/cxx/test_cv/test_model_cv.cc
@@ -88,13 +88,13 @@ void pre_process(const cv::Mat& img, int width, int height, Tensor dstTensor) {
  uint8_t* rgb_ptr = new uint8_t[img.cols * img.rows * 3];
  uint8_t* resize_ptr = new uint8_t[width * height * 3];
  // do convert bgr--rgb
-  img_process.imageConvert(img_ptr, rgb_ptr);
+  img_process.image_convert(img_ptr, rgb_ptr);
  // do resize
-  img_process.imageResize(rgb_ptr, resize_ptr);
+  img_process.image_resize(rgb_ptr, resize_ptr);
  // data--tensor and normalize
  float means[3] = {103.94f, 116.78f, 123.68f};
  float scales[3] = {0.017f, 0.017f, 0.017f};
-  img_process.image2Tensor(
+  img_process.image_to_tensor(
      resize_ptr, &dstTensor, LayoutType::kNCHW, means, scales);
  float* data = dstTensor.mutable_data<float>();
 #else

--- a/lite/kernels/arm/interpolate_compute.cc
+++ b/lite/kernels/arm/interpolate_compute.cc
@@ -35,6 +35,7 @@ void BilinearInterpCompute::Run() {
  int out_w = param.out_w;
  int out_h = param.out_h;
  bool align_corners = param.align_corners;
+  bool align_mode = param.align_mode;
  std::string interp_method = "Bilinear";
  lite::arm::math::interpolate(X,
                               OutSize,
@@ -45,6 +46,7 @@ void BilinearInterpCompute::Run() {
                               out_w,
                               scale,
                               align_corners,
+                               align_mode,
                               interp_method);
 }
@@ -59,6 +61,7 @@ void NearestInterpCompute::Run() {
  int out_w = param.out_w;
  int out_h = param.out_h;
  bool align_corners = param.align_corners;
+  bool align_mode = param.align_mode;
  std::string interp_method = "Nearest";
  lite::arm::math::interpolate(X,
                               OutSize,
@@ -69,6 +72,7 @@ void NearestInterpCompute::Run() {
                               out_w,
                               scale,
                               align_corners,
+                               align_mode,
                               interp_method);
 }

--- a/lite/tests/cv/image_convert_test.cc
+++ b/lite/tests/cv/image_convert_test.cc
@@ -293,53 +293,53 @@ void test_img(const std::vector<int>& cluster_id,
        // LOG(INFO) << "image convert saber compute";
        t_convert.Start();
-        // 方法一: image_preprocess.imageCovert(src, lite_dst);
+        // 方法一: image_preprocess.image_convert(src, lite_dst);
-        image_preprocess.imageConvert(
+        image_preprocess.image_convert(
            src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat);
        t_convert.Stop();
        // LOG(INFO) << "image resize saber compute";
        t_resize.Start();
-        // 方法一:image_preprocess.imageResize(lite_dst, resize_tmp);
+        // 方法一:image_preprocess.image_resize(lite_dst, resize_tmp);
-        image_preprocess.imageResize(lite_dst,
+        image_preprocess.image_resize(lite_dst,
-                                     resize_tmp,
+                                      resize_tmp,
-                                     (ImageFormat)dstFormat,
+                                      (ImageFormat)dstFormat,
-                                     srcw,
+                                      srcw,
-                                     srch,
+                                      srch,
-                                     dstw,
+                                      dstw,
-                                     dsth);
+                                      dsth);
        t_resize.Stop();
        // LOG(INFO) << "image rotate saber compute";
        t_rotate.Start();
-        // 方法一: image_preprocess.imageRotate(resize_tmp, tv_out_ratote);
+        // 方法一: image_preprocess.image_rotate(resize_tmp, tv_out_ratote);
-        image_preprocess.imageRotate(resize_tmp,
+        image_preprocess.image_rotate(resize_tmp,
-                                     tv_out_ratote,
+                                      tv_out_ratote,
-                                     (ImageFormat)dstFormat,
+                                      (ImageFormat)dstFormat,
-                                     dstw,
+                                      dstw,
-                                     dsth,
+                                      dsth,
-                                     rotate);
+                                      rotate);
        t_rotate.Stop();
        // LOG(INFO) << "image flip saber compute";
        t_flip.Start();
-        // 方法一: image_preprocess.imageFlip(resize_tmp, tv_out_flip);
+        // 方法一: image_preprocess.image_flip(resize_tmp, tv_out_flip);
-        image_preprocess.imageFlip(
+        image_preprocess.image_flip(
            resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip);
        t_flip.Stop();
        // LOG(INFO) << "image to tensor compute";
        t_tensor.Start();
-        // 方法一: image_preprocess.image2Tensor(
+        // 方法一: image_preprocess.image_to_tensor(
        //  resize_tmp, &dst_tensor, layout, means, scales);
-        image_preprocess.image2Tensor(resize_tmp,
+        image_preprocess.image_to_tensor(resize_tmp,
-                                      &dst_tensor,
+                                         &dst_tensor,
-                                      (ImageFormat)dstFormat,
+                                         (ImageFormat)dstFormat,
-                                      dstw,
+                                         dstw,
-                                      dsth,
+                                         dsth,
-                                      layout,
+                                         layout,
-                                      means,
+                                         means,
-                                      scales);
+                                         scales);
        t_tensor.Stop();
        t1.Stop();
      }
@@ -680,7 +680,7 @@ void test_rotate(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_rotate.Start();
-        image_preprocess.imageRotate(src, lite_dst);
+        image_preprocess.image_rotate(src, lite_dst);
        t_rotate.Stop();
      }
      LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg()
@@ -847,7 +847,7 @@ void test_flip(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_rotate.Start();
-        image_preprocess.imageFlip(src, lite_dst);
+        image_preprocess.image_flip(src, lite_dst);
        t_rotate.Stop();
      }
      LOG(INFO) << "image flip avg time : " << t_rotate.LapTimes().Avg()
@@ -1016,7 +1016,7 @@ void test_resize(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_rotate.Start();
-        image_preprocess.imageResize(src, lite_dst);
+        image_preprocess.image_resize(src, lite_dst);
        t_rotate.Stop();
      }
      LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
@@ -1191,7 +1191,7 @@ void test_convert(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_rotate.Start();
-        image_preprocess.imageConvert(src, lite_dst);
+        image_preprocess.image_convert(src, lite_dst);
        t_rotate.Stop();
      }
      LOG(INFO) << "image Convert avg time : " << t_rotate.LapTimes().Avg()

--- a/lite/tests/cv/image_profiler_test.cc
+++ b/lite/tests/cv/image_profiler_test.cc
@@ -163,7 +163,7 @@ void test_convert(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_lite.Start();
-        image_preprocess.imageConvert(src, lite_dst);
+        image_preprocess.image_convert(src, lite_dst);
        t_lite.Stop();
      }
      LOG(INFO) << "image Convert avg time : " << t_lite.LapTimes().Avg()
@@ -284,7 +284,7 @@ void test_resize(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_rotate.Start();
-        image_preprocess.imageResize(src, lite_dst);
+        image_preprocess.image_resize(src, lite_dst);
        t_rotate.Stop();
      }
      LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
@@ -405,7 +405,7 @@ void test_flip(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_lite.Start();
-        image_preprocess.imageFlip(src, lite_dst);
+        image_preprocess.image_flip(src, lite_dst);
        t_lite.Stop();
      }
      LOG(INFO) << "image flip avg time : " << t_lite.LapTimes().Avg()
@@ -523,7 +523,7 @@ void test_rotate(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_lite.Start();
-        image_preprocess.imageRotate(src, lite_dst);
+        image_preprocess.image_rotate(src, lite_dst);
        t_lite.Stop();
      }
      LOG(INFO) << "image rotate avg time : " << t_lite.LapTimes().Avg()
@@ -667,14 +667,14 @@ void test_to_tensor(const std::vector<int>& cluster_id,
      for (int i = 0; i < test_iter; ++i) {
        t_lite.Start();
-        image_preprocess.image2Tensor(src,
+        image_preprocess.image_to_tensor(src,
-                                      &dst_tensor,
+                                         &dst_tensor,
-                                      (ImageFormat)dstFormat,
+                                         (ImageFormat)dstFormat,
-                                      dstw,
+                                         dstw,
-                                      dsth,
+                                         dsth,
-                                      layout,
+                                         layout,
-                                      means,
+                                         means,
-                                      scales);
+                                         scales);
        t_lite.Stop();
      }
      LOG(INFO) << "image tensor avg time : " << t_lite.LapTimes().Avg()

--- a/lite/tests/kernels/interp_compute_test.cc
+++ b/lite/tests/kernels/interp_compute_test.cc
@@ -416,11 +416,6 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) {
  for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
    for (bool align_corners : {true, false}) {
      for (int align_mode : {0, 1}) {
-        // may exist bug in arm kernel
-        if (place == TARGET(kARM) && align_mode == 1 && !align_corners) {
-          continue;
-        }
-        // align_mode = 0 && align_corners = false NOT supported in Huawei
        // Ascend NPU DDK
        if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 &&
            !align_corners) {

--- a/lite/tests/math/sgemm_compute_test.cc
+++ b/lite/tests/math/sgemm_compute_test.cc
@@ -47,6 +47,7 @@ DEFINE_bool(basic_test, true, "do all tests");
 #else
 DEFINE_bool(basic_test, false, "do all tests");
 #endif
 DEFINE_bool(check_result, true, "check the result");
 DEFINE_int32(M, 512, "gemm: M");