From 9a9d1cf2916587beb7a7f9494872b971b0590283 Mon Sep 17 00:00:00 2001
From: HappyAngel <chenjiaobuaa@126.com>
Date: Fri, 18 Sep 2020 11:03:35 +0800
Subject: [PATCH] [arm]Bilinear resize compute error fix (#4351)

* fix bilinear_resize result not equal with fluid. test=develop

* fix cv build error. test=develop


* fix format. test=develop
---
 lite/backends/arm/math/interpolate.cc      | 52 +++++++++---------
 lite/backends/arm/math/interpolate.h       |  8 ++-
 lite/demo/cxx/test_cv/test_img_prepross.cc | 10 ++--
 lite/demo/cxx/test_cv/test_model_cv.cc     |  6 +-
 lite/kernels/arm/interpolate_compute.cc    |  4 ++
 lite/tests/cv/image_convert_test.cc        | 64 +++++++++++-----------
 lite/tests/cv/image_profiler_test.cc       | 24 ++++----
 lite/tests/kernels/interp_compute_test.cc  |  5 --
 lite/tests/math/sgemm_compute_test.cc      |  1 +
 9 files changed, 87 insertions(+), 87 deletions(-)
diff --git a/lite/backends/arm/math/interpolate.cc b/lite/backends/arm/math/interpolate.cc
index 1c53142fc5..4345c2e813 100644
--- a/lite/backends/arm/math/interpolate.cc
+++ b/lite/backends/arm/math/interpolate.cc
@@ -70,7 +70,8 @@ void bilinear_interp(const float* src,
                      int h_out,
                      float scale_x,
                      float scale_y,
-                     bool with_align) {
+                     bool align_corners,
+                     bool align_mode) {
   int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2];
 
   int* xofs = buf;
@@ -78,14 +79,13 @@ void bilinear_interp(const float* src,
 
   float* alpha = reinterpret_cast<float*>(buf + w_out + h_out);
   float* beta = reinterpret_cast<float*>(buf + w_out + h_out + w_out * 2);
+  bool with_align = (align_mode == 0 && !align_corners);
 
   float fx = 0.0f;
   float fy = 0.0f;
   int sx = 0;
   int sy = 0;
-  if (with_align) {
-    scale_x = static_cast<float>(w_in - 1) / (w_out - 1);
-    scale_y = static_cast<float>(h_in - 1) / (h_out - 1);
+  if (!with_align) {
     // calculate x axis coordinate
     for (int dx = 0; dx < w_out; dx++) {
       fx = dx * scale_x;
@@ -105,8 +105,6 @@ void bilinear_interp(const float* src,
       beta[dy * 2 + 1] = fy;
     }
   } else {
-    scale_x = static_cast<float>(w_in) / w_out;
-    scale_y = static_cast<float>(h_in) / h_out;
     // calculate x axis coordinate
     for (int dx = 0; dx < w_out; dx++) {
       fx = scale_x * (dx + 0.5f) - 0.5f;
@@ -468,15 +466,9 @@ void nearest_interp(const float* src,
                     float* dst,
                     int w_out,
                     int h_out,
-                    float scale_x,
-                    float scale_y,
+                    float scale_w_new,
+                    float scale_h_new,
                     bool with_align) {
-  float scale_w_new = (with_align)
-                          ? (static_cast<float>(w_in - 1) / (w_out - 1))
-                          : (static_cast<float>(w_in) / (w_out));
-  float scale_h_new = (with_align)
-                          ? (static_cast<float>(h_in - 1) / (h_out - 1))
-                          : (static_cast<float>(h_in) / (h_out));
   if (with_align) {
     for (int h = 0; h < h_out; ++h) {
       float* dst_p = dst + h * w_out;
@@ -506,7 +498,8 @@ void interpolate(lite::Tensor* X,
                  int out_height,
                  int out_width,
                  float scale,
-                 bool with_align,
+                 bool align_corners,
+                 bool align_mode,
                  std::string interpolate_type) {
   int in_h = X->dims()[2];
   int in_w = X->dims()[3];
@@ -531,12 +524,12 @@ void interpolate(lite::Tensor* X,
       out_width = out_size_data[1];
     }
   }
-  float height_scale = scale;
-  float width_scale = scale;
-  if (out_width > 0 && out_height > 0) {
-    height_scale = static_cast<float>(out_height / X->dims()[2]);
-    width_scale = static_cast<float>(out_width / X->dims()[3]);
-  }
+  // float height_scale = scale;
+  // float width_scale = scale;
+  // if (out_width > 0 && out_height > 0) {
+  //   height_scale = static_cast<float>(out_height / X->dims()[2]);
+  //   width_scale = static_cast<float>(out_width / X->dims()[3]);
+  // }
   int num_cout = X->dims()[0];
   int c_cout = X->dims()[1];
   Out->Resize({num_cout, c_cout, out_height, out_width});
@@ -551,6 +544,10 @@ void interpolate(lite::Tensor* X,
   int spatial_in = in_h * in_w;
   int spatial_out = out_h * out_w;
 
+  float scale_x = (align_corners) ? (static_cast<float>(in_w - 1) / (out_w - 1))
+                                  : (static_cast<float>(in_w) / (out_w));
+  float scale_y = (align_corners) ? (static_cast<float>(in_h - 1) / (out_h - 1))
+                                  : (static_cast<float>(in_h) / (out_h));
   if ("Bilinear" == interpolate_type) {
 #pragma omp parallel for
     for (int i = 0; i < count; ++i) {
@@ -560,9 +557,10 @@ void interpolate(lite::Tensor* X,
                       dout + spatial_out * i,
                       out_w,
                       out_h,
-                      1.f / width_scale,
-                      1.f / height_scale,
-                      with_align);
+                      scale_x,
+                      scale_y,
+                      align_corners,
+                      align_mode);
     }
   } else if ("Nearest" == interpolate_type) {
 #pragma omp parallel for
@@ -573,9 +571,9 @@ void interpolate(lite::Tensor* X,
                      dout + spatial_out * i,
                      out_w,
                      out_h,
-                     1.f / width_scale,
-                     1.f / height_scale,
-                     with_align);
+                     scale_x,
+                     scale_y,
+                     align_corners);
     }
   }
 }
diff --git a/lite/backends/arm/math/interpolate.h b/lite/backends/arm/math/interpolate.h
index e9c41c5bc8..82c4c068b6 100644
--- a/lite/backends/arm/math/interpolate.h
+++ b/lite/backends/arm/math/interpolate.h
@@ -30,7 +30,8 @@ void bilinear_interp(const float* src,
                      int h_out,
                      float scale_x,
                      float scale_y,
-                     bool with_align);
+                     bool align_corners,
+                     bool align_mode);
 
 void nearest_interp(const float* src,
                     int w_in,
@@ -40,7 +41,7 @@ void nearest_interp(const float* src,
                     int h_out,
                     float scale_x,
                     float scale_y,
-                    bool with_align);
+                    bool align_corners);
 
 void interpolate(lite::Tensor* X,
                  lite::Tensor* OutSize,
@@ -50,7 +51,8 @@ void interpolate(lite::Tensor* X,
                  int out_height,
                  int out_width,
                  float scale,
-                 bool with_align,
+                 bool align_corners,
+                 bool align_mode,
                  std::string interpolate_type);
 
 } /* namespace math */
diff --git a/lite/demo/cxx/test_cv/test_img_prepross.cc b/lite/demo/cxx/test_cv/test_img_prepross.cc
index 1fe632d387..0e00a02260 100644
--- a/lite/demo/cxx/test_cv/test_img_prepross.cc
+++ b/lite/demo/cxx/test_cv/test_img_prepross.cc
@@ -128,7 +128,7 @@ bool test_convert(bool cv_run,
   for (int i = 0; i < test_iter; i++) {
     clock_t begin = clock();
     // resize default linear
-    image_preprocess.imageConvert(src, resize_lite);
+    image_preprocess.image_convert(src, resize_lite);
     clock_t end = clock();
     to_lite += (end - begin);
   }
@@ -226,7 +226,7 @@ bool test_flip(bool cv_run,
   for (int i = 0; i < test_iter; i++) {
     clock_t begin = clock();
     // resize default linear
-    image_preprocess.imageFlip(src, resize_lite);
+    image_preprocess.image_flip(src, resize_lite);
     clock_t end = clock();
     to_lite += (end - begin);
   }
@@ -330,7 +330,7 @@ bool test_rotate(bool cv_run,
   for (int i = 0; i < test_iter; i++) {
     clock_t begin = clock();
     // resize default linear
-    image_preprocess.imageRotate(src, resize_lite);
+    image_preprocess.image_rotate(src, resize_lite);
     clock_t end = clock();
     to_lite += (end - begin);
   }
@@ -426,7 +426,7 @@ bool test_resize(bool cv_run,
   for (int i = 0; i < test_iter; i++) {
     clock_t begin = clock();
     // resize default linear
-    image_preprocess.imageResize(src, resize_lite);
+    image_preprocess.image_resize(src, resize_lite);
     clock_t end = clock();
     to_lite += (end - begin);
   }
@@ -526,7 +526,7 @@ bool test_crop(bool cv_run,
   std::cout << "lite compute:" << std::endl;
   for (int i = 0; i < test_iter; i++) {
     clock_t begin = clock();
-    image_preprocess.imageCrop(
+    image_preprocess.image_crop(
         src, resize_lite, dstFormat, srcw, srch, left_x, left_y, dstw, dsth);
     clock_t end = clock();
     to_lite += (end - begin);
diff --git a/lite/demo/cxx/test_cv/test_model_cv.cc b/lite/demo/cxx/test_cv/test_model_cv.cc
index caa085eecb..6da35ea26f 100644
--- a/lite/demo/cxx/test_cv/test_model_cv.cc
+++ b/lite/demo/cxx/test_cv/test_model_cv.cc
@@ -88,13 +88,13 @@ void pre_process(const cv::Mat& img, int width, int height, Tensor dstTensor) {
   uint8_t* rgb_ptr = new uint8_t[img.cols * img.rows * 3];
   uint8_t* resize_ptr = new uint8_t[width * height * 3];
   // do convert bgr--rgb
-  img_process.imageConvert(img_ptr, rgb_ptr);
+  img_process.image_convert(img_ptr, rgb_ptr);
   // do resize
-  img_process.imageResize(rgb_ptr, resize_ptr);
+  img_process.image_resize(rgb_ptr, resize_ptr);
   // data--tensor and normalize
   float means[3] = {103.94f, 116.78f, 123.68f};
   float scales[3] = {0.017f, 0.017f, 0.017f};
-  img_process.image2Tensor(
+  img_process.image_to_tensor(
       resize_ptr, &dstTensor, LayoutType::kNCHW, means, scales);
   float* data = dstTensor.mutable_data<float>();
 #else
diff --git a/lite/kernels/arm/interpolate_compute.cc b/lite/kernels/arm/interpolate_compute.cc
index 760b2fcf06..8593758d5a 100644
--- a/lite/kernels/arm/interpolate_compute.cc
+++ b/lite/kernels/arm/interpolate_compute.cc
@@ -35,6 +35,7 @@ void BilinearInterpCompute::Run() {
   int out_w = param.out_w;
   int out_h = param.out_h;
   bool align_corners = param.align_corners;
+  bool align_mode = param.align_mode;
   std::string interp_method = "Bilinear";
   lite::arm::math::interpolate(X,
                                OutSize,
@@ -45,6 +46,7 @@ void BilinearInterpCompute::Run() {
                                out_w,
                                scale,
                                align_corners,
+                               align_mode,
                                interp_method);
 }
 
@@ -59,6 +61,7 @@ void NearestInterpCompute::Run() {
   int out_w = param.out_w;
   int out_h = param.out_h;
   bool align_corners = param.align_corners;
+  bool align_mode = param.align_mode;
   std::string interp_method = "Nearest";
   lite::arm::math::interpolate(X,
                                OutSize,
@@ -69,6 +72,7 @@ void NearestInterpCompute::Run() {
                                out_w,
                                scale,
                                align_corners,
+                               align_mode,
                                interp_method);
 }
 
diff --git a/lite/tests/cv/image_convert_test.cc b/lite/tests/cv/image_convert_test.cc
index b1302f3396..ee2bda1226 100644
--- a/lite/tests/cv/image_convert_test.cc
+++ b/lite/tests/cv/image_convert_test.cc
@@ -293,53 +293,53 @@ void test_img(const std::vector<int>& cluster_id,
 
         // LOG(INFO) << "image convert saber compute";
         t_convert.Start();
-        // 方法一: image_preprocess.imageCovert(src, lite_dst);
-        image_preprocess.imageConvert(
+        // 方法一: image_preprocess.image_convert(src, lite_dst);
+        image_preprocess.image_convert(
             src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat);
         t_convert.Stop();
 
         // LOG(INFO) << "image resize saber compute";
         t_resize.Start();
-        // 方法一:image_preprocess.imageResize(lite_dst, resize_tmp);
-        image_preprocess.imageResize(lite_dst,
-                                     resize_tmp,
-                                     (ImageFormat)dstFormat,
-                                     srcw,
-                                     srch,
-                                     dstw,
-                                     dsth);
+        // 方法一:image_preprocess.image_resize(lite_dst, resize_tmp);
+        image_preprocess.image_resize(lite_dst,
+                                      resize_tmp,
+                                      (ImageFormat)dstFormat,
+                                      srcw,
+                                      srch,
+                                      dstw,
+                                      dsth);
         t_resize.Stop();
 
         // LOG(INFO) << "image rotate saber compute";
         t_rotate.Start();
-        // 方法一: image_preprocess.imageRotate(resize_tmp, tv_out_ratote);
-        image_preprocess.imageRotate(resize_tmp,
-                                     tv_out_ratote,
-                                     (ImageFormat)dstFormat,
-                                     dstw,
-                                     dsth,
-                                     rotate);
+        // 方法一: image_preprocess.image_rotate(resize_tmp, tv_out_ratote);
+        image_preprocess.image_rotate(resize_tmp,
+                                      tv_out_ratote,
+                                      (ImageFormat)dstFormat,
+                                      dstw,
+                                      dsth,
+                                      rotate);
         t_rotate.Stop();
 
         // LOG(INFO) << "image flip saber compute";
         t_flip.Start();
-        // 方法一: image_preprocess.imageFlip(resize_tmp, tv_out_flip);
-        image_preprocess.imageFlip(
+        // 方法一: image_preprocess.image_flip(resize_tmp, tv_out_flip);
+        image_preprocess.image_flip(
             resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip);
         t_flip.Stop();
 
         // LOG(INFO) << "image to tensor compute";
         t_tensor.Start();
-        // 方法一: image_preprocess.image2Tensor(
+        // 方法一: image_preprocess.image_to_tensor(
         //  resize_tmp, &dst_tensor, layout, means, scales);
-        image_preprocess.image2Tensor(resize_tmp,
-                                      &dst_tensor,
-                                      (ImageFormat)dstFormat,
-                                      dstw,
-                                      dsth,
-                                      layout,
-                                      means,
-                                      scales);
+        image_preprocess.image_to_tensor(resize_tmp,
+                                         &dst_tensor,
+                                         (ImageFormat)dstFormat,
+                                         dstw,
+                                         dsth,
+                                         layout,
+                                         means,
+                                         scales);
         t_tensor.Stop();
         t1.Stop();
       }
@@ -680,7 +680,7 @@ void test_rotate(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_rotate.Start();
-        image_preprocess.imageRotate(src, lite_dst);
+        image_preprocess.image_rotate(src, lite_dst);
         t_rotate.Stop();
       }
       LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg()
@@ -847,7 +847,7 @@ void test_flip(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_rotate.Start();
-        image_preprocess.imageFlip(src, lite_dst);
+        image_preprocess.image_flip(src, lite_dst);
         t_rotate.Stop();
       }
       LOG(INFO) << "image flip avg time : " << t_rotate.LapTimes().Avg()
@@ -1016,7 +1016,7 @@ void test_resize(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_rotate.Start();
-        image_preprocess.imageResize(src, lite_dst);
+        image_preprocess.image_resize(src, lite_dst);
         t_rotate.Stop();
       }
       LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
@@ -1191,7 +1191,7 @@ void test_convert(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_rotate.Start();
-        image_preprocess.imageConvert(src, lite_dst);
+        image_preprocess.image_convert(src, lite_dst);
         t_rotate.Stop();
       }
       LOG(INFO) << "image Convert avg time : " << t_rotate.LapTimes().Avg()
diff --git a/lite/tests/cv/image_profiler_test.cc b/lite/tests/cv/image_profiler_test.cc
index c440940bc2..074f2e6ce8 100644
--- a/lite/tests/cv/image_profiler_test.cc
+++ b/lite/tests/cv/image_profiler_test.cc
@@ -163,7 +163,7 @@ void test_convert(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_lite.Start();
-        image_preprocess.imageConvert(src, lite_dst);
+        image_preprocess.image_convert(src, lite_dst);
         t_lite.Stop();
       }
       LOG(INFO) << "image Convert avg time : " << t_lite.LapTimes().Avg()
@@ -284,7 +284,7 @@ void test_resize(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_rotate.Start();
-        image_preprocess.imageResize(src, lite_dst);
+        image_preprocess.image_resize(src, lite_dst);
         t_rotate.Stop();
       }
       LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
@@ -405,7 +405,7 @@ void test_flip(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_lite.Start();
-        image_preprocess.imageFlip(src, lite_dst);
+        image_preprocess.image_flip(src, lite_dst);
         t_lite.Stop();
       }
       LOG(INFO) << "image flip avg time : " << t_lite.LapTimes().Avg()
@@ -523,7 +523,7 @@ void test_rotate(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_lite.Start();
-        image_preprocess.imageRotate(src, lite_dst);
+        image_preprocess.image_rotate(src, lite_dst);
         t_lite.Stop();
       }
       LOG(INFO) << "image rotate avg time : " << t_lite.LapTimes().Avg()
@@ -667,14 +667,14 @@ void test_to_tensor(const std::vector<int>& cluster_id,
 
       for (int i = 0; i < test_iter; ++i) {
         t_lite.Start();
-        image_preprocess.image2Tensor(src,
-                                      &dst_tensor,
-                                      (ImageFormat)dstFormat,
-                                      dstw,
-                                      dsth,
-                                      layout,
-                                      means,
-                                      scales);
+        image_preprocess.image_to_tensor(src,
+                                         &dst_tensor,
+                                         (ImageFormat)dstFormat,
+                                         dstw,
+                                         dsth,
+                                         layout,
+                                         means,
+                                         scales);
         t_lite.Stop();
       }
       LOG(INFO) << "image tensor avg time : " << t_lite.LapTimes().Avg()
diff --git a/lite/tests/kernels/interp_compute_test.cc b/lite/tests/kernels/interp_compute_test.cc
index 16bc735f81..8d10040bca 100644
--- a/lite/tests/kernels/interp_compute_test.cc
+++ b/lite/tests/kernels/interp_compute_test.cc
@@ -416,11 +416,6 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) {
   for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
     for (bool align_corners : {true, false}) {
       for (int align_mode : {0, 1}) {
-        // may exist bug in arm kernel
-        if (place == TARGET(kARM) && align_mode == 1 && !align_corners) {
-          continue;
-        }
-        // align_mode = 0 && align_corners = false NOT supported in Huawei
         // Ascend NPU DDK
         if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 &&
             !align_corners) {
diff --git a/lite/tests/math/sgemm_compute_test.cc b/lite/tests/math/sgemm_compute_test.cc
index 11f39ccf57..c16c7332f6 100644
--- a/lite/tests/math/sgemm_compute_test.cc
+++ b/lite/tests/math/sgemm_compute_test.cc
@@ -47,6 +47,7 @@ DEFINE_bool(basic_test, true, "do all tests");
 #else
 DEFINE_bool(basic_test, false, "do all tests");
 #endif
+
 DEFINE_bool(check_result, true, "check the result");
 
 DEFINE_int32(M, 512, "gemm: M");
-- 
GitLab