From 9a9d1cf2916587beb7a7f9494872b971b0590283 Mon Sep 17 00:00:00 2001 From: HappyAngel Date: Fri, 18 Sep 2020 11:03:35 +0800 Subject: [PATCH] [arm]Bilinear resize compute error fix (#4351) * fix bilinear_resize result not equal with fluid. test=develop * fix cv build error. test=develop * fix format. test=develop --- lite/backends/arm/math/interpolate.cc | 52 +++++++++--------- lite/backends/arm/math/interpolate.h | 8 ++- lite/demo/cxx/test_cv/test_img_prepross.cc | 10 ++-- lite/demo/cxx/test_cv/test_model_cv.cc | 6 +- lite/kernels/arm/interpolate_compute.cc | 4 ++ lite/tests/cv/image_convert_test.cc | 64 +++++++++++----------- lite/tests/cv/image_profiler_test.cc | 24 ++++---- lite/tests/kernels/interp_compute_test.cc | 5 -- lite/tests/math/sgemm_compute_test.cc | 1 + 9 files changed, 87 insertions(+), 87 deletions(-) diff --git a/lite/backends/arm/math/interpolate.cc b/lite/backends/arm/math/interpolate.cc index 1c53142fc5..4345c2e813 100644 --- a/lite/backends/arm/math/interpolate.cc +++ b/lite/backends/arm/math/interpolate.cc @@ -70,7 +70,8 @@ void bilinear_interp(const float* src, int h_out, float scale_x, float scale_y, - bool with_align) { + bool align_corners, + bool align_mode) { int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2]; int* xofs = buf; @@ -78,14 +79,13 @@ void bilinear_interp(const float* src, float* alpha = reinterpret_cast(buf + w_out + h_out); float* beta = reinterpret_cast(buf + w_out + h_out + w_out * 2); + bool with_align = (align_mode == 0 && !align_corners); float fx = 0.0f; float fy = 0.0f; int sx = 0; int sy = 0; - if (with_align) { - scale_x = static_cast(w_in - 1) / (w_out - 1); - scale_y = static_cast(h_in - 1) / (h_out - 1); + if (!with_align) { // calculate x axis coordinate for (int dx = 0; dx < w_out; dx++) { fx = dx * scale_x; @@ -105,8 +105,6 @@ void bilinear_interp(const float* src, beta[dy * 2 + 1] = fy; } } else { - scale_x = static_cast(w_in) / w_out; - scale_y = static_cast(h_in) / h_out; // calculate x axis coordinate for (int dx = 0; dx < w_out; dx++) { fx = scale_x * (dx + 0.5f) - 0.5f; @@ -468,15 +466,9 @@ void nearest_interp(const float* src, float* dst, int w_out, int h_out, - float scale_x, - float scale_y, + float scale_w_new, + float scale_h_new, bool with_align) { - float scale_w_new = (with_align) - ? (static_cast(w_in - 1) / (w_out - 1)) - : (static_cast(w_in) / (w_out)); - float scale_h_new = (with_align) - ? (static_cast(h_in - 1) / (h_out - 1)) - : (static_cast(h_in) / (h_out)); if (with_align) { for (int h = 0; h < h_out; ++h) { float* dst_p = dst + h * w_out; @@ -506,7 +498,8 @@ void interpolate(lite::Tensor* X, int out_height, int out_width, float scale, - bool with_align, + bool align_corners, + bool align_mode, std::string interpolate_type) { int in_h = X->dims()[2]; int in_w = X->dims()[3]; @@ -531,12 +524,12 @@ void interpolate(lite::Tensor* X, out_width = out_size_data[1]; } } - float height_scale = scale; - float width_scale = scale; - if (out_width > 0 && out_height > 0) { - height_scale = static_cast(out_height / X->dims()[2]); - width_scale = static_cast(out_width / X->dims()[3]); - } + // float height_scale = scale; + // float width_scale = scale; + // if (out_width > 0 && out_height > 0) { + // height_scale = static_cast(out_height / X->dims()[2]); + // width_scale = static_cast(out_width / X->dims()[3]); + // } int num_cout = X->dims()[0]; int c_cout = X->dims()[1]; Out->Resize({num_cout, c_cout, out_height, out_width}); @@ -551,6 +544,10 @@ void interpolate(lite::Tensor* X, int spatial_in = in_h * in_w; int spatial_out = out_h * out_w; + float scale_x = (align_corners) ? (static_cast(in_w - 1) / (out_w - 1)) + : (static_cast(in_w) / (out_w)); + float scale_y = (align_corners) ? (static_cast(in_h - 1) / (out_h - 1)) + : (static_cast(in_h) / (out_h)); if ("Bilinear" == interpolate_type) { #pragma omp parallel for for (int i = 0; i < count; ++i) { @@ -560,9 +557,10 @@ void interpolate(lite::Tensor* X, dout + spatial_out * i, out_w, out_h, - 1.f / width_scale, - 1.f / height_scale, - with_align); + scale_x, + scale_y, + align_corners, + align_mode); } } else if ("Nearest" == interpolate_type) { #pragma omp parallel for @@ -573,9 +571,9 @@ void interpolate(lite::Tensor* X, dout + spatial_out * i, out_w, out_h, - 1.f / width_scale, - 1.f / height_scale, - with_align); + scale_x, + scale_y, + align_corners); } } } diff --git a/lite/backends/arm/math/interpolate.h b/lite/backends/arm/math/interpolate.h index e9c41c5bc8..82c4c068b6 100644 --- a/lite/backends/arm/math/interpolate.h +++ b/lite/backends/arm/math/interpolate.h @@ -30,7 +30,8 @@ void bilinear_interp(const float* src, int h_out, float scale_x, float scale_y, - bool with_align); + bool align_corners, + bool align_mode); void nearest_interp(const float* src, int w_in, @@ -40,7 +41,7 @@ void nearest_interp(const float* src, int h_out, float scale_x, float scale_y, - bool with_align); + bool align_corners); void interpolate(lite::Tensor* X, lite::Tensor* OutSize, @@ -50,7 +51,8 @@ void interpolate(lite::Tensor* X, int out_height, int out_width, float scale, - bool with_align, + bool align_corners, + bool align_mode, std::string interpolate_type); } /* namespace math */ diff --git a/lite/demo/cxx/test_cv/test_img_prepross.cc b/lite/demo/cxx/test_cv/test_img_prepross.cc index 1fe632d387..0e00a02260 100644 --- a/lite/demo/cxx/test_cv/test_img_prepross.cc +++ b/lite/demo/cxx/test_cv/test_img_prepross.cc @@ -128,7 +128,7 @@ bool test_convert(bool cv_run, for (int i = 0; i < test_iter; i++) { clock_t begin = clock(); // resize default linear - image_preprocess.imageConvert(src, resize_lite); + image_preprocess.image_convert(src, resize_lite); clock_t end = clock(); to_lite += (end - begin); } @@ -226,7 +226,7 @@ bool test_flip(bool cv_run, for (int i = 0; i < test_iter; i++) { clock_t begin = clock(); // resize default linear - image_preprocess.imageFlip(src, resize_lite); + image_preprocess.image_flip(src, resize_lite); clock_t end = clock(); to_lite += (end - begin); } @@ -330,7 +330,7 @@ bool test_rotate(bool cv_run, for (int i = 0; i < test_iter; i++) { clock_t begin = clock(); // resize default linear - image_preprocess.imageRotate(src, resize_lite); + image_preprocess.image_rotate(src, resize_lite); clock_t end = clock(); to_lite += (end - begin); } @@ -426,7 +426,7 @@ bool test_resize(bool cv_run, for (int i = 0; i < test_iter; i++) { clock_t begin = clock(); // resize default linear - image_preprocess.imageResize(src, resize_lite); + image_preprocess.image_resize(src, resize_lite); clock_t end = clock(); to_lite += (end - begin); } @@ -526,7 +526,7 @@ bool test_crop(bool cv_run, std::cout << "lite compute:" << std::endl; for (int i = 0; i < test_iter; i++) { clock_t begin = clock(); - image_preprocess.imageCrop( + image_preprocess.image_crop( src, resize_lite, dstFormat, srcw, srch, left_x, left_y, dstw, dsth); clock_t end = clock(); to_lite += (end - begin); diff --git a/lite/demo/cxx/test_cv/test_model_cv.cc b/lite/demo/cxx/test_cv/test_model_cv.cc index caa085eecb..6da35ea26f 100644 --- a/lite/demo/cxx/test_cv/test_model_cv.cc +++ b/lite/demo/cxx/test_cv/test_model_cv.cc @@ -88,13 +88,13 @@ void pre_process(const cv::Mat& img, int width, int height, Tensor dstTensor) { uint8_t* rgb_ptr = new uint8_t[img.cols * img.rows * 3]; uint8_t* resize_ptr = new uint8_t[width * height * 3]; // do convert bgr--rgb - img_process.imageConvert(img_ptr, rgb_ptr); + img_process.image_convert(img_ptr, rgb_ptr); // do resize - img_process.imageResize(rgb_ptr, resize_ptr); + img_process.image_resize(rgb_ptr, resize_ptr); // data--tensor and normalize float means[3] = {103.94f, 116.78f, 123.68f}; float scales[3] = {0.017f, 0.017f, 0.017f}; - img_process.image2Tensor( + img_process.image_to_tensor( resize_ptr, &dstTensor, LayoutType::kNCHW, means, scales); float* data = dstTensor.mutable_data(); #else diff --git a/lite/kernels/arm/interpolate_compute.cc b/lite/kernels/arm/interpolate_compute.cc index 760b2fcf06..8593758d5a 100644 --- a/lite/kernels/arm/interpolate_compute.cc +++ b/lite/kernels/arm/interpolate_compute.cc @@ -35,6 +35,7 @@ void BilinearInterpCompute::Run() { int out_w = param.out_w; int out_h = param.out_h; bool align_corners = param.align_corners; + bool align_mode = param.align_mode; std::string interp_method = "Bilinear"; lite::arm::math::interpolate(X, OutSize, @@ -45,6 +46,7 @@ void BilinearInterpCompute::Run() { out_w, scale, align_corners, + align_mode, interp_method); } @@ -59,6 +61,7 @@ void NearestInterpCompute::Run() { int out_w = param.out_w; int out_h = param.out_h; bool align_corners = param.align_corners; + bool align_mode = param.align_mode; std::string interp_method = "Nearest"; lite::arm::math::interpolate(X, OutSize, @@ -69,6 +72,7 @@ void NearestInterpCompute::Run() { out_w, scale, align_corners, + align_mode, interp_method); } diff --git a/lite/tests/cv/image_convert_test.cc b/lite/tests/cv/image_convert_test.cc index b1302f3396..ee2bda1226 100644 --- a/lite/tests/cv/image_convert_test.cc +++ b/lite/tests/cv/image_convert_test.cc @@ -293,53 +293,53 @@ void test_img(const std::vector& cluster_id, // LOG(INFO) << "image convert saber compute"; t_convert.Start(); - // 方法一: image_preprocess.imageCovert(src, lite_dst); - image_preprocess.imageConvert( + // 方法一: image_preprocess.image_convert(src, lite_dst); + image_preprocess.image_convert( src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat); t_convert.Stop(); // LOG(INFO) << "image resize saber compute"; t_resize.Start(); - // 方法一:image_preprocess.imageResize(lite_dst, resize_tmp); - image_preprocess.imageResize(lite_dst, - resize_tmp, - (ImageFormat)dstFormat, - srcw, - srch, - dstw, - dsth); + // 方法一:image_preprocess.image_resize(lite_dst, resize_tmp); + image_preprocess.image_resize(lite_dst, + resize_tmp, + (ImageFormat)dstFormat, + srcw, + srch, + dstw, + dsth); t_resize.Stop(); // LOG(INFO) << "image rotate saber compute"; t_rotate.Start(); - // 方法一: image_preprocess.imageRotate(resize_tmp, tv_out_ratote); - image_preprocess.imageRotate(resize_tmp, - tv_out_ratote, - (ImageFormat)dstFormat, - dstw, - dsth, - rotate); + // 方法一: image_preprocess.image_rotate(resize_tmp, tv_out_ratote); + image_preprocess.image_rotate(resize_tmp, + tv_out_ratote, + (ImageFormat)dstFormat, + dstw, + dsth, + rotate); t_rotate.Stop(); // LOG(INFO) << "image flip saber compute"; t_flip.Start(); - // 方法一: image_preprocess.imageFlip(resize_tmp, tv_out_flip); - image_preprocess.imageFlip( + // 方法一: image_preprocess.image_flip(resize_tmp, tv_out_flip); + image_preprocess.image_flip( resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip); t_flip.Stop(); // LOG(INFO) << "image to tensor compute"; t_tensor.Start(); - // 方法一: image_preprocess.image2Tensor( + // 方法一: image_preprocess.image_to_tensor( // resize_tmp, &dst_tensor, layout, means, scales); - image_preprocess.image2Tensor(resize_tmp, - &dst_tensor, - (ImageFormat)dstFormat, - dstw, - dsth, - layout, - means, - scales); + image_preprocess.image_to_tensor(resize_tmp, + &dst_tensor, + (ImageFormat)dstFormat, + dstw, + dsth, + layout, + means, + scales); t_tensor.Stop(); t1.Stop(); } @@ -680,7 +680,7 @@ void test_rotate(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_rotate.Start(); - image_preprocess.imageRotate(src, lite_dst); + image_preprocess.image_rotate(src, lite_dst); t_rotate.Stop(); } LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg() @@ -847,7 +847,7 @@ void test_flip(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_rotate.Start(); - image_preprocess.imageFlip(src, lite_dst); + image_preprocess.image_flip(src, lite_dst); t_rotate.Stop(); } LOG(INFO) << "image flip avg time : " << t_rotate.LapTimes().Avg() @@ -1016,7 +1016,7 @@ void test_resize(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_rotate.Start(); - image_preprocess.imageResize(src, lite_dst); + image_preprocess.image_resize(src, lite_dst); t_rotate.Stop(); } LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg() @@ -1191,7 +1191,7 @@ void test_convert(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_rotate.Start(); - image_preprocess.imageConvert(src, lite_dst); + image_preprocess.image_convert(src, lite_dst); t_rotate.Stop(); } LOG(INFO) << "image Convert avg time : " << t_rotate.LapTimes().Avg() diff --git a/lite/tests/cv/image_profiler_test.cc b/lite/tests/cv/image_profiler_test.cc index c440940bc2..074f2e6ce8 100644 --- a/lite/tests/cv/image_profiler_test.cc +++ b/lite/tests/cv/image_profiler_test.cc @@ -163,7 +163,7 @@ void test_convert(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_lite.Start(); - image_preprocess.imageConvert(src, lite_dst); + image_preprocess.image_convert(src, lite_dst); t_lite.Stop(); } LOG(INFO) << "image Convert avg time : " << t_lite.LapTimes().Avg() @@ -284,7 +284,7 @@ void test_resize(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_rotate.Start(); - image_preprocess.imageResize(src, lite_dst); + image_preprocess.image_resize(src, lite_dst); t_rotate.Stop(); } LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg() @@ -405,7 +405,7 @@ void test_flip(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_lite.Start(); - image_preprocess.imageFlip(src, lite_dst); + image_preprocess.image_flip(src, lite_dst); t_lite.Stop(); } LOG(INFO) << "image flip avg time : " << t_lite.LapTimes().Avg() @@ -523,7 +523,7 @@ void test_rotate(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_lite.Start(); - image_preprocess.imageRotate(src, lite_dst); + image_preprocess.image_rotate(src, lite_dst); t_lite.Stop(); } LOG(INFO) << "image rotate avg time : " << t_lite.LapTimes().Avg() @@ -667,14 +667,14 @@ void test_to_tensor(const std::vector& cluster_id, for (int i = 0; i < test_iter; ++i) { t_lite.Start(); - image_preprocess.image2Tensor(src, - &dst_tensor, - (ImageFormat)dstFormat, - dstw, - dsth, - layout, - means, - scales); + image_preprocess.image_to_tensor(src, + &dst_tensor, + (ImageFormat)dstFormat, + dstw, + dsth, + layout, + means, + scales); t_lite.Stop(); } LOG(INFO) << "image tensor avg time : " << t_lite.LapTimes().Avg() diff --git a/lite/tests/kernels/interp_compute_test.cc b/lite/tests/kernels/interp_compute_test.cc index 16bc735f81..8d10040bca 100644 --- a/lite/tests/kernels/interp_compute_test.cc +++ b/lite/tests/kernels/interp_compute_test.cc @@ -416,11 +416,6 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) { for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { for (bool align_corners : {true, false}) { for (int align_mode : {0, 1}) { - // may exist bug in arm kernel - if (place == TARGET(kARM) && align_mode == 1 && !align_corners) { - continue; - } - // align_mode = 0 && align_corners = false NOT supported in Huawei // Ascend NPU DDK if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 && !align_corners) { diff --git a/lite/tests/math/sgemm_compute_test.cc b/lite/tests/math/sgemm_compute_test.cc index 11f39ccf57..c16c7332f6 100644 --- a/lite/tests/math/sgemm_compute_test.cc +++ b/lite/tests/math/sgemm_compute_test.cc @@ -47,6 +47,7 @@ DEFINE_bool(basic_test, true, "do all tests"); #else DEFINE_bool(basic_test, false, "do all tests"); #endif + DEFINE_bool(check_result, true, "check the result"); DEFINE_int32(M, 512, "gemm: M"); -- GitLab