未验证 提交 9a9d1cf2 编写于 作者: H HappyAngel 提交者: GitHub

[arm]Bilinear resize compute error fix (#4351)

* fix bilinear_resize result not equal with fluid. test=develop

* fix cv build error. test=develop


* fix format. test=develop
上级 db2ab554
......@@ -70,7 +70,8 @@ void bilinear_interp(const float* src,
int h_out,
float scale_x,
float scale_y,
bool with_align) {
bool align_corners,
bool align_mode) {
int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2];
int* xofs = buf;
......@@ -78,14 +79,13 @@ void bilinear_interp(const float* src,
float* alpha = reinterpret_cast<float*>(buf + w_out + h_out);
float* beta = reinterpret_cast<float*>(buf + w_out + h_out + w_out * 2);
bool with_align = (align_mode == 0 && !align_corners);
float fx = 0.0f;
float fy = 0.0f;
int sx = 0;
int sy = 0;
if (with_align) {
scale_x = static_cast<float>(w_in - 1) / (w_out - 1);
scale_y = static_cast<float>(h_in - 1) / (h_out - 1);
if (!with_align) {
// calculate x axis coordinate
for (int dx = 0; dx < w_out; dx++) {
fx = dx * scale_x;
......@@ -105,8 +105,6 @@ void bilinear_interp(const float* src,
beta[dy * 2 + 1] = fy;
}
} else {
scale_x = static_cast<float>(w_in) / w_out;
scale_y = static_cast<float>(h_in) / h_out;
// calculate x axis coordinate
for (int dx = 0; dx < w_out; dx++) {
fx = scale_x * (dx + 0.5f) - 0.5f;
......@@ -468,15 +466,9 @@ void nearest_interp(const float* src,
float* dst,
int w_out,
int h_out,
float scale_x,
float scale_y,
float scale_w_new,
float scale_h_new,
bool with_align) {
float scale_w_new = (with_align)
? (static_cast<float>(w_in - 1) / (w_out - 1))
: (static_cast<float>(w_in) / (w_out));
float scale_h_new = (with_align)
? (static_cast<float>(h_in - 1) / (h_out - 1))
: (static_cast<float>(h_in) / (h_out));
if (with_align) {
for (int h = 0; h < h_out; ++h) {
float* dst_p = dst + h * w_out;
......@@ -506,7 +498,8 @@ void interpolate(lite::Tensor* X,
int out_height,
int out_width,
float scale,
bool with_align,
bool align_corners,
bool align_mode,
std::string interpolate_type) {
int in_h = X->dims()[2];
int in_w = X->dims()[3];
......@@ -531,12 +524,12 @@ void interpolate(lite::Tensor* X,
out_width = out_size_data[1];
}
}
float height_scale = scale;
float width_scale = scale;
if (out_width > 0 && out_height > 0) {
height_scale = static_cast<float>(out_height / X->dims()[2]);
width_scale = static_cast<float>(out_width / X->dims()[3]);
}
// float height_scale = scale;
// float width_scale = scale;
// if (out_width > 0 && out_height > 0) {
// height_scale = static_cast<float>(out_height / X->dims()[2]);
// width_scale = static_cast<float>(out_width / X->dims()[3]);
// }
int num_cout = X->dims()[0];
int c_cout = X->dims()[1];
Out->Resize({num_cout, c_cout, out_height, out_width});
......@@ -551,6 +544,10 @@ void interpolate(lite::Tensor* X,
int spatial_in = in_h * in_w;
int spatial_out = out_h * out_w;
float scale_x = (align_corners) ? (static_cast<float>(in_w - 1) / (out_w - 1))
: (static_cast<float>(in_w) / (out_w));
float scale_y = (align_corners) ? (static_cast<float>(in_h - 1) / (out_h - 1))
: (static_cast<float>(in_h) / (out_h));
if ("Bilinear" == interpolate_type) {
#pragma omp parallel for
for (int i = 0; i < count; ++i) {
......@@ -560,9 +557,10 @@ void interpolate(lite::Tensor* X,
dout + spatial_out * i,
out_w,
out_h,
1.f / width_scale,
1.f / height_scale,
with_align);
scale_x,
scale_y,
align_corners,
align_mode);
}
} else if ("Nearest" == interpolate_type) {
#pragma omp parallel for
......@@ -573,9 +571,9 @@ void interpolate(lite::Tensor* X,
dout + spatial_out * i,
out_w,
out_h,
1.f / width_scale,
1.f / height_scale,
with_align);
scale_x,
scale_y,
align_corners);
}
}
}
......
......@@ -30,7 +30,8 @@ void bilinear_interp(const float* src,
int h_out,
float scale_x,
float scale_y,
bool with_align);
bool align_corners,
bool align_mode);
void nearest_interp(const float* src,
int w_in,
......@@ -40,7 +41,7 @@ void nearest_interp(const float* src,
int h_out,
float scale_x,
float scale_y,
bool with_align);
bool align_corners);
void interpolate(lite::Tensor* X,
lite::Tensor* OutSize,
......@@ -50,7 +51,8 @@ void interpolate(lite::Tensor* X,
int out_height,
int out_width,
float scale,
bool with_align,
bool align_corners,
bool align_mode,
std::string interpolate_type);
} /* namespace math */
......
......@@ -128,7 +128,7 @@ bool test_convert(bool cv_run,
for (int i = 0; i < test_iter; i++) {
clock_t begin = clock();
// resize default linear
image_preprocess.imageConvert(src, resize_lite);
image_preprocess.image_convert(src, resize_lite);
clock_t end = clock();
to_lite += (end - begin);
}
......@@ -226,7 +226,7 @@ bool test_flip(bool cv_run,
for (int i = 0; i < test_iter; i++) {
clock_t begin = clock();
// resize default linear
image_preprocess.imageFlip(src, resize_lite);
image_preprocess.image_flip(src, resize_lite);
clock_t end = clock();
to_lite += (end - begin);
}
......@@ -330,7 +330,7 @@ bool test_rotate(bool cv_run,
for (int i = 0; i < test_iter; i++) {
clock_t begin = clock();
// resize default linear
image_preprocess.imageRotate(src, resize_lite);
image_preprocess.image_rotate(src, resize_lite);
clock_t end = clock();
to_lite += (end - begin);
}
......@@ -426,7 +426,7 @@ bool test_resize(bool cv_run,
for (int i = 0; i < test_iter; i++) {
clock_t begin = clock();
// resize default linear
image_preprocess.imageResize(src, resize_lite);
image_preprocess.image_resize(src, resize_lite);
clock_t end = clock();
to_lite += (end - begin);
}
......@@ -526,7 +526,7 @@ bool test_crop(bool cv_run,
std::cout << "lite compute:" << std::endl;
for (int i = 0; i < test_iter; i++) {
clock_t begin = clock();
image_preprocess.imageCrop(
image_preprocess.image_crop(
src, resize_lite, dstFormat, srcw, srch, left_x, left_y, dstw, dsth);
clock_t end = clock();
to_lite += (end - begin);
......
......@@ -88,13 +88,13 @@ void pre_process(const cv::Mat& img, int width, int height, Tensor dstTensor) {
uint8_t* rgb_ptr = new uint8_t[img.cols * img.rows * 3];
uint8_t* resize_ptr = new uint8_t[width * height * 3];
// do convert bgr--rgb
img_process.imageConvert(img_ptr, rgb_ptr);
img_process.image_convert(img_ptr, rgb_ptr);
// do resize
img_process.imageResize(rgb_ptr, resize_ptr);
img_process.image_resize(rgb_ptr, resize_ptr);
// data--tensor and normalize
float means[3] = {103.94f, 116.78f, 123.68f};
float scales[3] = {0.017f, 0.017f, 0.017f};
img_process.image2Tensor(
img_process.image_to_tensor(
resize_ptr, &dstTensor, LayoutType::kNCHW, means, scales);
float* data = dstTensor.mutable_data<float>();
#else
......
......@@ -35,6 +35,7 @@ void BilinearInterpCompute::Run() {
int out_w = param.out_w;
int out_h = param.out_h;
bool align_corners = param.align_corners;
bool align_mode = param.align_mode;
std::string interp_method = "Bilinear";
lite::arm::math::interpolate(X,
OutSize,
......@@ -45,6 +46,7 @@ void BilinearInterpCompute::Run() {
out_w,
scale,
align_corners,
align_mode,
interp_method);
}
......@@ -59,6 +61,7 @@ void NearestInterpCompute::Run() {
int out_w = param.out_w;
int out_h = param.out_h;
bool align_corners = param.align_corners;
bool align_mode = param.align_mode;
std::string interp_method = "Nearest";
lite::arm::math::interpolate(X,
OutSize,
......@@ -69,6 +72,7 @@ void NearestInterpCompute::Run() {
out_w,
scale,
align_corners,
align_mode,
interp_method);
}
......
......@@ -293,53 +293,53 @@ void test_img(const std::vector<int>& cluster_id,
// LOG(INFO) << "image convert saber compute";
t_convert.Start();
// 方法一: image_preprocess.imageCovert(src, lite_dst);
image_preprocess.imageConvert(
// 方法一: image_preprocess.image_convert(src, lite_dst);
image_preprocess.image_convert(
src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat);
t_convert.Stop();
// LOG(INFO) << "image resize saber compute";
t_resize.Start();
// 方法一:image_preprocess.imageResize(lite_dst, resize_tmp);
image_preprocess.imageResize(lite_dst,
resize_tmp,
(ImageFormat)dstFormat,
srcw,
srch,
dstw,
dsth);
// 方法一:image_preprocess.image_resize(lite_dst, resize_tmp);
image_preprocess.image_resize(lite_dst,
resize_tmp,
(ImageFormat)dstFormat,
srcw,
srch,
dstw,
dsth);
t_resize.Stop();
// LOG(INFO) << "image rotate saber compute";
t_rotate.Start();
// 方法一: image_preprocess.imageRotate(resize_tmp, tv_out_ratote);
image_preprocess.imageRotate(resize_tmp,
tv_out_ratote,
(ImageFormat)dstFormat,
dstw,
dsth,
rotate);
// 方法一: image_preprocess.image_rotate(resize_tmp, tv_out_ratote);
image_preprocess.image_rotate(resize_tmp,
tv_out_ratote,
(ImageFormat)dstFormat,
dstw,
dsth,
rotate);
t_rotate.Stop();
// LOG(INFO) << "image flip saber compute";
t_flip.Start();
// 方法一: image_preprocess.imageFlip(resize_tmp, tv_out_flip);
image_preprocess.imageFlip(
// 方法一: image_preprocess.image_flip(resize_tmp, tv_out_flip);
image_preprocess.image_flip(
resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip);
t_flip.Stop();
// LOG(INFO) << "image to tensor compute";
t_tensor.Start();
// 方法一: image_preprocess.image2Tensor(
// 方法一: image_preprocess.image_to_tensor(
// resize_tmp, &dst_tensor, layout, means, scales);
image_preprocess.image2Tensor(resize_tmp,
&dst_tensor,
(ImageFormat)dstFormat,
dstw,
dsth,
layout,
means,
scales);
image_preprocess.image_to_tensor(resize_tmp,
&dst_tensor,
(ImageFormat)dstFormat,
dstw,
dsth,
layout,
means,
scales);
t_tensor.Stop();
t1.Stop();
}
......@@ -680,7 +680,7 @@ void test_rotate(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_rotate.Start();
image_preprocess.imageRotate(src, lite_dst);
image_preprocess.image_rotate(src, lite_dst);
t_rotate.Stop();
}
LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg()
......@@ -847,7 +847,7 @@ void test_flip(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_rotate.Start();
image_preprocess.imageFlip(src, lite_dst);
image_preprocess.image_flip(src, lite_dst);
t_rotate.Stop();
}
LOG(INFO) << "image flip avg time : " << t_rotate.LapTimes().Avg()
......@@ -1016,7 +1016,7 @@ void test_resize(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_rotate.Start();
image_preprocess.imageResize(src, lite_dst);
image_preprocess.image_resize(src, lite_dst);
t_rotate.Stop();
}
LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
......@@ -1191,7 +1191,7 @@ void test_convert(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_rotate.Start();
image_preprocess.imageConvert(src, lite_dst);
image_preprocess.image_convert(src, lite_dst);
t_rotate.Stop();
}
LOG(INFO) << "image Convert avg time : " << t_rotate.LapTimes().Avg()
......
......@@ -163,7 +163,7 @@ void test_convert(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_lite.Start();
image_preprocess.imageConvert(src, lite_dst);
image_preprocess.image_convert(src, lite_dst);
t_lite.Stop();
}
LOG(INFO) << "image Convert avg time : " << t_lite.LapTimes().Avg()
......@@ -284,7 +284,7 @@ void test_resize(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_rotate.Start();
image_preprocess.imageResize(src, lite_dst);
image_preprocess.image_resize(src, lite_dst);
t_rotate.Stop();
}
LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg()
......@@ -405,7 +405,7 @@ void test_flip(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_lite.Start();
image_preprocess.imageFlip(src, lite_dst);
image_preprocess.image_flip(src, lite_dst);
t_lite.Stop();
}
LOG(INFO) << "image flip avg time : " << t_lite.LapTimes().Avg()
......@@ -523,7 +523,7 @@ void test_rotate(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_lite.Start();
image_preprocess.imageRotate(src, lite_dst);
image_preprocess.image_rotate(src, lite_dst);
t_lite.Stop();
}
LOG(INFO) << "image rotate avg time : " << t_lite.LapTimes().Avg()
......@@ -667,14 +667,14 @@ void test_to_tensor(const std::vector<int>& cluster_id,
for (int i = 0; i < test_iter; ++i) {
t_lite.Start();
image_preprocess.image2Tensor(src,
&dst_tensor,
(ImageFormat)dstFormat,
dstw,
dsth,
layout,
means,
scales);
image_preprocess.image_to_tensor(src,
&dst_tensor,
(ImageFormat)dstFormat,
dstw,
dsth,
layout,
means,
scales);
t_lite.Stop();
}
LOG(INFO) << "image tensor avg time : " << t_lite.LapTimes().Avg()
......
......@@ -416,11 +416,6 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (bool align_corners : {true, false}) {
for (int align_mode : {0, 1}) {
// may exist bug in arm kernel
if (place == TARGET(kARM) && align_mode == 1 && !align_corners) {
continue;
}
// align_mode = 0 && align_corners = false NOT supported in Huawei
// Ascend NPU DDK
if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 &&
!align_corners) {
......
......@@ -47,6 +47,7 @@ DEFINE_bool(basic_test, true, "do all tests");
#else
DEFINE_bool(basic_test, false, "do all tests");
#endif
DEFINE_bool(check_result, true, "check the result");
DEFINE_int32(M, 512, "gemm: M");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册