diff --git a/lite/tests/cv/cv_basic.h b/lite/tests/cv/cv_basic.h index 92f68543bb15bdc15a8ed029f67ed33ca215361b..45ae3fb616527f404984bba7b2366a2f66c09a96 100644 --- a/lite/tests/cv/cv_basic.h +++ b/lite/tests/cv/cv_basic.h @@ -489,7 +489,7 @@ void image_resize_basic(const uint8_t* in_data, int size = srcw * srch; if (srcw == dstw && srch == dsth) { if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) { - size = srcw * (ceil(1.5 * srch)); + size = srcw * (static_cast(1.5 * srch)); } else if (srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB) { size = 3 * srcw * srch; } else if (srcFormat == ImageFormat::BGRA || @@ -499,23 +499,23 @@ void image_resize_basic(const uint8_t* in_data, memcpy(out_data, in_data, sizeof(uint8_t) * size); return; } - double scale_x = static_cast(srcw / dstw); - double scale_y = static_cast(srch / dsth); + double scale_x = static_cast(srcw) / dstw; + double scale_y = static_cast(srch) / dsth; int* buf = new int[dstw + dsth]; int* xofs = buf; int* yofs = buf + dstw; float* ialpha = new float[dstw * 2]; - float* ibeta = new float[dsth * 2]; + float* ibeta = new float[dsth * 3]; int w_in = srcw; int w_out = dstw; int num = 1; int orih = dsth; + compute_xy( srcw, srch, dstw, dsth, scale_x, scale_y, xofs, yofs, ialpha, ibeta); - if (srcFormat == ImageFormat::GRAY) { num = 1; } else if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) { @@ -538,9 +538,9 @@ void image_resize_basic(const uint8_t* in_data, int* yofs1 = nullptr; if (orih < dsth) { int tmp = dsth - orih; - float* ialpha1 = new float[dstw]; - int* xofs1 = new int[srcw]; - int* yofs1 = new int[tmp]; + ialpha1 = new float[srcw]; + xofs1 = new int[dstw / 2]; + yofs1 = new int[tmp]; compute_xy(srcw / 2, srch / 2, dstw / 2, @@ -550,18 +550,14 @@ void image_resize_basic(const uint8_t* in_data, xofs1, yofs1, ialpha1, - ibeta + dsth); + ibeta + orih * 2); } #pragma omp parallel for for (int dy = 0; dy < dsth; dy++) { uint8_t* out_ptr = out_data + dy * w_out; int y_in_start = yofs[dy]; - int y_in_end = y_in_start + 1; - int y_flag = 0; // only one line - if (y_in_start < 0) { - y_flag = 1; - y_in_end = 0; - } + int y_flag = 0; + float b0 = ibeta[dy * 2]; float b1 = ibeta[dy * 2 + 1]; if (dy >= orih) { @@ -569,6 +565,12 @@ void image_resize_basic(const uint8_t* in_data, ialpha = ialpha1; xofs = xofs1; yofs = yofs1; + y_in_start = yofs[dy - orih]; + } + int y_in_end = y_in_start + 1; + if (y_in_start < 0) { + y_flag = 1; + y_in_end = 0; } for (int dx = 0; dx < w_out; dx += num) { int tmp = dx / num; @@ -579,7 +581,6 @@ void image_resize_basic(const uint8_t* in_data, x_flag = 1; x_in_end = 0; } - // printf("x_in: %d, y_in: %d \n", x_in_start, y_in_start); float a0 = ialpha[tmp * 2]; float a1 = ialpha[tmp * 2 + 1]; int tl_index = y_in_start * w_in + x_in_start; // 0 @@ -605,9 +606,6 @@ void image_resize_basic(const uint8_t* in_data, bl_index++; br_index++; float outval = (tl * a0 + tr * a1) * b0 + (bl * a0 + br * a1) * b1; - // printf("tl: %d, tr: %d, bl: %d, br: %d \n", tl, tr, bl, br); - // printf("br_index: %d, a0: %f, b1: %f, out: %f \n", br_index, a0, b1, - // outval); out_ptr[ind++] = ceil(outval); } } diff --git a/lite/tests/cv/image_convert_test.cc b/lite/tests/cv/image_convert_test.cc index a6835fc8f883e84c8235fd141dfbdab537812d16..b1302f3396fa17471d4252e27897ec44c0110342 100644 --- a/lite/tests/cv/image_convert_test.cc +++ b/lite/tests/cv/image_convert_test.cc @@ -559,48 +559,722 @@ void test_img(const std::vector& cluster_id, } } +void test_rotate(const std::vector& cluster_id, + const std::vector& thread_num, + int srcw, + int srch, + int dstw, + int dsth, + ImageFormat srcFormat, + ImageFormat dstFormat, + float rotate, + FlipParam flip, + LayoutType layout, + int test_iter = 10) { +#ifdef LITE_WITH_ARM + paddle::lite::DeviceInfo::Init(); +#endif + for (auto& cls : cluster_id) { + for (auto& th : thread_num) { + std::unique_ptr ctx1( + new paddle::lite::KernelContext); + auto& ctx = ctx1->As(); + ctx.SetRunMode(static_cast(cls), th); + LOG(INFO) << "cluster: " << cls << ", threads: " << th; + + LOG(INFO) << " input tensor size, num= " << 1 << ", channel= " << 1 + << ", height= " << srch << ", width= " << srcw + << ", srcFormat= " << (ImageFormat)srcFormat; + if (srcFormat == ImageFormat::NV21) { + LOG(INFO) << "srcFormat: NV21"; + } + if (srcFormat == ImageFormat::NV12) { + LOG(INFO) << "srcFormat: NV12"; + } + if (srcFormat == ImageFormat::GRAY) { + LOG(INFO) << "srcFormat: GRAY"; + } + if (srcFormat == ImageFormat::BGRA) { + LOG(INFO) << "srcFormat: BGRA"; + } + if (srcFormat == ImageFormat::BGR) { + LOG(INFO) << "srcFormat: BGR"; + } + if (srcFormat == ImageFormat::RGBA) { + LOG(INFO) << "srcFormat: RGBA"; + } + if (srcFormat == ImageFormat::RGB) { + LOG(INFO) << "srcFormat: RGB"; + } + LOG(INFO) << " output tensor size, num=" << 1 << ", channel=" << 1 + << ", height=" << dsth << ", width=" << dstw + << ", dstFormat= " << (ImageFormat)dstFormat; + + if (dstFormat == ImageFormat::NV21) { + LOG(INFO) << "dstFormat: NV21"; + } + if (dstFormat == ImageFormat::NV12) { + LOG(INFO) << "dstFormat: NV12"; + } + if (dstFormat == ImageFormat::GRAY) { + LOG(INFO) << "dstFormat: GRAY"; + } + if (dstFormat == ImageFormat::BGRA) { + LOG(INFO) << "dstFormat: BGRA"; + } + if (dstFormat == ImageFormat::BGR) { + LOG(INFO) << "dstFormat: BGR"; + } + if (dstFormat == ImageFormat::RGBA) { + LOG(INFO) << "dstFormat: RGBA"; + } + if (dstFormat == ImageFormat::RGB) { + LOG(INFO) << "dstFormat: RGB"; + } + + LOG(INFO) << "Rotate = " << rotate << ", Flip = " << flip + << ", Layout = " << static_cast(layout); + + int size = 3 * srch * srcw; + if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) { + size = ceil(1.5 * srch) * srcw; + } else if (srcFormat == ImageFormat::BGRA || + srcFormat == ImageFormat::RGBA) { + size = 4 * srch * srcw; + } else if (srcFormat == ImageFormat::GRAY) { + size = srch * srcw; + } + uint8_t* src = new uint8_t[size]; + fill_tensor_host_rand(src, size); + + int out_size = srch * srcw; + if (dstFormat == ImageFormat::NV12 || dstFormat == ImageFormat::NV21) { + out_size = ceil(1.5 * srch) * srcw; + } else if (dstFormat == ImageFormat::BGR || + dstFormat == ImageFormat::RGB) { + out_size = 3 * srch * srcw; + } else if (dstFormat == ImageFormat::BGRA || + dstFormat == ImageFormat::RGBA) { + out_size = 4 * srch * srcw; + } else if (dstFormat == ImageFormat::GRAY) { + out_size = srch * srcw; + } + uint8_t* basic_dst = new uint8_t[out_size]; + uint8_t* lite_dst = new uint8_t[out_size]; + if (FLAGS_check_result) { + image_rotate_basic( + src, basic_dst, (ImageFormat)dstFormat, srcw, srch, rotate); + } + Timer t_rotate; + + LOG(INFO) << "saber cv compute"; + TransParam tparam; + tparam.ih = srch; + tparam.iw = srcw; + tparam.oh = srch; + tparam.ow = srcw; + tparam.flip_param = flip; + tparam.rotate_param = rotate; + + ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam); + + for (int i = 0; i < test_iter; ++i) { + t_rotate.Start(); + image_preprocess.imageRotate(src, lite_dst); + t_rotate.Stop(); + } + LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg() + << ", min time: " << t_rotate.LapTimes().Min() + << ", max time: " << t_rotate.LapTimes().Max(); + + double max_ratio = 0; + double max_diff = 0; + const double eps = 1e-6f; + if (FLAGS_check_result) { + LOG(INFO) << "diff, image rotate size: " << out_size; + uint8_t* diff_v = new uint8_t[out_size]; + for (int i = 0; i < out_size; i++) { + uint8_t a = lite_dst[i]; + uint8_t b = basic_dst[i]; + uint8_t diff1 = a - b; + uint8_t diff = diff1 > 0 ? diff1 : -diff1; + diff_v[i] = diff; + if (max_diff < diff) { + max_diff = diff; + max_ratio = 2.0 * max_diff / (a + b + eps); + } + } + if (std::abs(max_ratio) >= 1e-5f) { + int width = size / srch; + printf("din: %d \n", width); + print_int8(src, size, width); + width = srch * 3; + printf("saber result: %d\n", width); + print_int8(lite_dst, out_size, width); + printf("basic result: \n"); + print_int8(basic_dst, out_size, width); + printf("diff result: \n"); + print_int8(diff_v, out_size, width); + } + delete[] diff_v; + LOG(INFO) << "compare result, max diff: " << max_diff + << ", max ratio: " << max_ratio; + bool rst = std::abs(max_ratio) < 1e-5f; + CHECK_EQ(rst, true) << "compute result error"; + } + LOG(INFO) << "image rotate end"; + } + } +} +void test_flip(const std::vector& cluster_id, + const std::vector& thread_num, + int srcw, + int srch, + int dstw, + int dsth, + ImageFormat srcFormat, + ImageFormat dstFormat, + float rotate, + FlipParam flip, + LayoutType layout, + int test_iter = 10) { +#ifdef LITE_WITH_ARM + paddle::lite::DeviceInfo::Init(); +#endif + for (auto& cls : cluster_id) { + for (auto& th : thread_num) { + std::unique_ptr ctx1( + new paddle::lite::KernelContext); + auto& ctx = ctx1->As(); + ctx.SetRunMode(static_cast(cls), th); + LOG(INFO) << "cluster: " << cls << ", threads: " << th; + + LOG(INFO) << " input tensor size, num= " << 1 << ", channel= " << 1 + << ", height= " << srch << ", width= " << srcw + << ", srcFormat= " << (ImageFormat)srcFormat; + if (srcFormat == ImageFormat::NV21) { + LOG(INFO) << "srcFormat: NV21"; + } + if (srcFormat == ImageFormat::NV12) { + LOG(INFO) << "srcFormat: NV12"; + } + if (srcFormat == ImageFormat::GRAY) { + LOG(INFO) << "srcFormat: GRAY"; + } + if (srcFormat == ImageFormat::BGRA) { + LOG(INFO) << "srcFormat: BGRA"; + } + if (srcFormat == ImageFormat::BGR) { + LOG(INFO) << "srcFormat: BGR"; + } + if (srcFormat == ImageFormat::RGBA) { + LOG(INFO) << "srcFormat: RGBA"; + } + if (srcFormat == ImageFormat::RGB) { + LOG(INFO) << "srcFormat: RGB"; + } + LOG(INFO) << " output tensor size, num=" << 1 << ", channel=" << 1 + << ", height=" << dsth << ", width=" << dstw + << ", dstFormat= " << (ImageFormat)dstFormat; + + if (dstFormat == ImageFormat::NV21) { + LOG(INFO) << "dstFormat: NV21"; + } + if (dstFormat == ImageFormat::NV12) { + LOG(INFO) << "dstFormat: NV12"; + } + if (dstFormat == ImageFormat::GRAY) { + LOG(INFO) << "dstFormat: GRAY"; + } + if (dstFormat == ImageFormat::BGRA) { + LOG(INFO) << "dstFormat: BGRA"; + } + if (dstFormat == ImageFormat::BGR) { + LOG(INFO) << "dstFormat: BGR"; + } + if (dstFormat == ImageFormat::RGBA) { + LOG(INFO) << "dstFormat: RGBA"; + } + if (dstFormat == ImageFormat::RGB) { + LOG(INFO) << "dstFormat: RGB"; + } + + LOG(INFO) << "Rotate = " << rotate << ", Flip = " << flip + << ", Layout = " << static_cast(layout); + + int size = 3 * srch * srcw; + if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) { + size = ceil(1.5 * srch) * srcw; + } else if (srcFormat == ImageFormat::BGRA || + srcFormat == ImageFormat::RGBA) { + size = 4 * srch * srcw; + } else if (srcFormat == ImageFormat::GRAY) { + size = srch * srcw; + } + uint8_t* src = new uint8_t[size]; + fill_tensor_host_rand(src, size); + + int out_size = srch * srcw; + if (dstFormat == ImageFormat::NV12 || dstFormat == ImageFormat::NV21) { + out_size = ceil(1.5 * srch) * srcw; + } else if (dstFormat == ImageFormat::BGR || + dstFormat == ImageFormat::RGB) { + out_size = 3 * srch * srcw; + } else if (dstFormat == ImageFormat::BGRA || + dstFormat == ImageFormat::RGBA) { + out_size = 4 * srch * srcw; + } else if (dstFormat == ImageFormat::GRAY) { + out_size = srch * srcw; + } + uint8_t* basic_dst = new uint8_t[out_size]; + uint8_t* lite_dst = new uint8_t[out_size]; + if (FLAGS_check_result) { + image_flip_basic( + src, basic_dst, (ImageFormat)dstFormat, srcw, srch, flip); + } + Timer t_rotate; + + LOG(INFO) << "saber cv compute"; + TransParam tparam; + tparam.ih = srch; + tparam.iw = srcw; + tparam.oh = srch; + tparam.ow = srcw; + tparam.flip_param = flip; + tparam.rotate_param = rotate; + + ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam); + + for (int i = 0; i < test_iter; ++i) { + t_rotate.Start(); + image_preprocess.imageFlip(src, lite_dst); + t_rotate.Stop(); + } + LOG(INFO) << "image flip avg time : " << t_rotate.LapTimes().Avg() + << ", min time: " << t_rotate.LapTimes().Min() + << ", max time: " << t_rotate.LapTimes().Max(); + + double max_ratio = 0; + double max_diff = 0; + const double eps = 1e-6f; + if (FLAGS_check_result) { + LOG(INFO) << "diff, image flip size: " << out_size; + uint8_t* diff_v = new uint8_t[out_size]; + for (int i = 0; i < out_size; i++) { + uint8_t a = lite_dst[i]; + uint8_t b = basic_dst[i]; + uint8_t diff1 = a - b; + uint8_t diff = diff1 > 0 ? diff1 : -diff1; + diff_v[i] = diff; + if (max_diff < diff) { + max_diff = diff; + max_ratio = 2.0 * max_diff / (a + b + eps); + } + } + if (std::abs(max_ratio) >= 1e-5f) { + int width = size / srch; + printf("din: \n"); + print_int8(src, size, width); + width = out_size / srch; + printf("saber result: \n"); + print_int8(lite_dst, out_size, width); + printf("basic result: \n"); + print_int8(basic_dst, out_size, width); + printf("diff result: \n"); + print_int8(diff_v, out_size, width); + } + delete[] diff_v; + LOG(INFO) << "compare result, max diff: " << max_diff + << ", max ratio: " << max_ratio; + bool rst = std::abs(max_ratio) < 1e-5f; + CHECK_EQ(rst, true) << "compute result error"; + } + LOG(INFO) << "image flip end"; + } + } +} +void test_resize(const std::vector& cluster_id, + const std::vector& thread_num, + int srcw, + int srch, + int dstw, + int dsth, + ImageFormat srcFormat, + ImageFormat dstFormat, + float rotate, + FlipParam flip, + LayoutType layout, + int test_iter = 10) { +#ifdef LITE_WITH_ARM + paddle::lite::DeviceInfo::Init(); +#endif + test_iter = 1; + for (auto& cls : cluster_id) { + for (auto& th : thread_num) { + std::unique_ptr ctx1( + new paddle::lite::KernelContext); + auto& ctx = ctx1->As(); + ctx.SetRunMode(static_cast(cls), th); + LOG(INFO) << "cluster: " << cls << ", threads: " << th; + + LOG(INFO) << " input tensor size, num= " << 1 << ", channel= " << 1 + << ", height= " << srch << ", width= " << srcw + << ", srcFormat= " << (ImageFormat)srcFormat; + if (srcFormat == ImageFormat::NV21) { + LOG(INFO) << "srcFormat: NV21"; + } + if (srcFormat == ImageFormat::NV12) { + LOG(INFO) << "srcFormat: NV12"; + } + if (srcFormat == ImageFormat::GRAY) { + LOG(INFO) << "srcFormat: GRAY"; + } + if (srcFormat == ImageFormat::BGRA) { + LOG(INFO) << "srcFormat: BGRA"; + } + if (srcFormat == ImageFormat::BGR) { + LOG(INFO) << "srcFormat: BGR"; + } + if (srcFormat == ImageFormat::RGBA) { + LOG(INFO) << "srcFormat: RGBA"; + } + if (srcFormat == ImageFormat::RGB) { + LOG(INFO) << "srcFormat: RGB"; + } + LOG(INFO) << " output tensor size, num=" << 1 << ", channel=" << 1 + << ", height=" << dsth << ", width=" << dstw + << ", dstFormat= " << (ImageFormat)dstFormat; + + if (dstFormat == ImageFormat::NV21) { + LOG(INFO) << "dstFormat: NV21"; + } + if (dstFormat == ImageFormat::NV12) { + LOG(INFO) << "dstFormat: NV12"; + } + if (dstFormat == ImageFormat::GRAY) { + LOG(INFO) << "dstFormat: GRAY"; + } + if (dstFormat == ImageFormat::BGRA) { + LOG(INFO) << "dstFormat: BGRA"; + } + if (dstFormat == ImageFormat::BGR) { + LOG(INFO) << "dstFormat: BGR"; + } + if (dstFormat == ImageFormat::RGBA) { + LOG(INFO) << "dstFormat: RGBA"; + } + if (dstFormat == ImageFormat::RGB) { + LOG(INFO) << "dstFormat: RGB"; + } + + LOG(INFO) << "Rotate = " << rotate << ", Flip = " << flip + << ", Layout = " << static_cast(layout); + + int size = 3 * srch * srcw; + if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) { + size = ceil(1.5 * srch) * srcw; + } else if (srcFormat == ImageFormat::BGRA || + srcFormat == ImageFormat::RGBA) { + size = 4 * srch * srcw; + } else if (srcFormat == ImageFormat::GRAY) { + size = srch * srcw; + } + uint8_t* src = new uint8_t[size]; + fill_tensor_host_rand(src, size); + + int out_size = dsth * dstw; + if (dstFormat == ImageFormat::NV12 || dstFormat == ImageFormat::NV21) { + out_size = ceil(1.5 * dsth) * dstw; + } else if (dstFormat == ImageFormat::BGR || + dstFormat == ImageFormat::RGB) { + out_size = 3 * dsth * dstw; + } else if (dstFormat == ImageFormat::BGRA || + dstFormat == ImageFormat::RGBA) { + out_size = 4 * dsth * dstw; + } else if (dstFormat == ImageFormat::GRAY) { + out_size = dsth * dstw; + } + uint8_t* basic_dst = new uint8_t[out_size]; + uint8_t* lite_dst = new uint8_t[out_size]; + if (FLAGS_check_result) { + LOG(INFO) << "image_resize_basic"; + image_resize_basic( + src, basic_dst, (ImageFormat)dstFormat, srcw, srch, dstw, dsth); + } + Timer t_rotate; + + LOG(INFO) << "saber cv compute"; + TransParam tparam; + tparam.ih = srch; + tparam.iw = srcw; + tparam.oh = dsth; + tparam.ow = dstw; + tparam.flip_param = flip; + tparam.rotate_param = rotate; + + ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam); + + for (int i = 0; i < test_iter; ++i) { + t_rotate.Start(); + image_preprocess.imageResize(src, lite_dst); + t_rotate.Stop(); + } + LOG(INFO) << "image Resize avg time : " << t_rotate.LapTimes().Avg() + << ", min time: " << t_rotate.LapTimes().Min() + << ", max time: " << t_rotate.LapTimes().Max(); + + double max_ratio = 0; + double max_diff = 0; + const double eps = 1e-6f; + if (FLAGS_check_result) { + LOG(INFO) << "diff, image Resize size: " << out_size; + int* diff_v = new int[out_size]; + for (int i = 0; i < out_size; i++) { + uint8_t a = lite_dst[i]; + uint8_t b = basic_dst[i]; + int diff1 = a - b; // basic resize and saber resize 在float -> + // int转换时存在误差,误差范围是{-1, 1} + int diff = 0; + if (diff1 < -1 || diff1 > 1) diff = diff1 < 0 ? -diff1 : diff1; + diff_v[i] = diff; + if (diff > 1 && max_diff < diff) { + max_diff = diff; + printf("i: %d, lite: %d, basic: %d \n", i, a, b); + max_ratio = 2.0 * max_diff / (a + b + eps); + } + } + if (std::abs(max_ratio) >= 1e-5f) { + int width = size / srcw; + printf("din: \n"); + print_int8(src, size, width); + width = out_size / dstw; + printf("saber result: \n"); + print_int8(lite_dst, out_size, width); + printf("basic result: \n"); + print_int8(basic_dst, out_size, width); + printf("diff result: \n"); + print_int(diff_v, out_size, width); + } + delete[] diff_v; + LOG(INFO) << "compare result, max diff: " << max_diff + << ", max ratio: " << max_ratio; + bool rst = std::abs(max_ratio) < 1e-5f; + CHECK_EQ(rst, true) << "compute result error"; + } + LOG(INFO) << "image Resize end"; + } + } +} +void test_convert(const std::vector& cluster_id, + const std::vector& thread_num, + int srcw, + int srch, + int dstw, + int dsth, + ImageFormat srcFormat, + ImageFormat dstFormat, + float rotate, + FlipParam flip, + LayoutType layout, + int test_iter = 10) { +#ifdef LITE_WITH_ARM + paddle::lite::DeviceInfo::Init(); +#endif + for (auto& cls : cluster_id) { + for (auto& th : thread_num) { + std::unique_ptr ctx1( + new paddle::lite::KernelContext); + auto& ctx = ctx1->As(); + ctx.SetRunMode(static_cast(cls), th); + LOG(INFO) << "cluster: " << cls << ", threads: " << th; + + LOG(INFO) << " input tensor size, num= " << 1 << ", channel= " << 1 + << ", height= " << srch << ", width= " << srcw + << ", srcFormat= " << (ImageFormat)srcFormat; + if (srcFormat == ImageFormat::NV21) { + LOG(INFO) << "srcFormat: NV21"; + } + if (srcFormat == ImageFormat::NV12) { + LOG(INFO) << "srcFormat: NV12"; + } + if (srcFormat == ImageFormat::GRAY) { + LOG(INFO) << "srcFormat: GRAY"; + } + if (srcFormat == ImageFormat::BGRA) { + LOG(INFO) << "srcFormat: BGRA"; + } + if (srcFormat == ImageFormat::BGR) { + LOG(INFO) << "srcFormat: BGR"; + } + if (srcFormat == ImageFormat::RGBA) { + LOG(INFO) << "srcFormat: RGBA"; + } + if (srcFormat == ImageFormat::RGB) { + LOG(INFO) << "srcFormat: RGB"; + } + LOG(INFO) << " output tensor size, num=" << 1 << ", channel=" << 1 + << ", height=" << dsth << ", width=" << dstw + << ", dstFormat= " << (ImageFormat)dstFormat; + + if (dstFormat == ImageFormat::NV21) { + LOG(INFO) << "dstFormat: NV21"; + } + if (dstFormat == ImageFormat::NV12) { + LOG(INFO) << "dstFormat: NV12"; + } + if (dstFormat == ImageFormat::GRAY) { + LOG(INFO) << "dstFormat: GRAY"; + } + if (dstFormat == ImageFormat::BGRA) { + LOG(INFO) << "dstFormat: BGRA"; + } + if (dstFormat == ImageFormat::BGR) { + LOG(INFO) << "dstFormat: BGR"; + } + if (dstFormat == ImageFormat::RGBA) { + LOG(INFO) << "dstFormat: RGBA"; + } + if (dstFormat == ImageFormat::RGB) { + LOG(INFO) << "dstFormat: RGB"; + } + + LOG(INFO) << "Rotate = " << rotate << ", Flip = " << flip + << ", Layout = " << static_cast(layout); + + int size = 3 * srch * srcw; + if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) { + size = ceil(1.5 * srch) * srcw; + } else if (srcFormat == ImageFormat::BGRA || + srcFormat == ImageFormat::RGBA) { + size = 4 * srch * srcw; + } else if (srcFormat == ImageFormat::GRAY) { + size = srch * srcw; + } + uint8_t* src = new uint8_t[size]; + fill_tensor_host_rand(src, size); + + int out_size = srch * srcw; + if (dstFormat == ImageFormat::NV12 || dstFormat == ImageFormat::NV21) { + out_size = ceil(1.5 * srch) * srcw; + } else if (dstFormat == ImageFormat::BGR || + dstFormat == ImageFormat::RGB) { + out_size = 3 * srch * srcw; + } else if (dstFormat == ImageFormat::BGRA || + dstFormat == ImageFormat::RGBA) { + out_size = 4 * srch * srcw; + } else if (dstFormat == ImageFormat::GRAY) { + out_size = srch * srcw; + } + uint8_t* basic_dst = new uint8_t[out_size]; + uint8_t* lite_dst = new uint8_t[out_size]; + if (FLAGS_check_result) { + image_convert_basic(src, + basic_dst, + (ImageFormat)srcFormat, + (ImageFormat)dstFormat, + srcw, + srch, + out_size); + } + Timer t_rotate; + + LOG(INFO) << "saber cv compute"; + TransParam tparam; + tparam.ih = srch; + tparam.iw = srcw; + tparam.oh = srch; + tparam.ow = srcw; + tparam.flip_param = flip; + tparam.rotate_param = rotate; + + ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam); + + for (int i = 0; i < test_iter; ++i) { + t_rotate.Start(); + image_preprocess.imageConvert(src, lite_dst); + t_rotate.Stop(); + } + LOG(INFO) << "image Convert avg time : " << t_rotate.LapTimes().Avg() + << ", min time: " << t_rotate.LapTimes().Min() + << ", max time: " << t_rotate.LapTimes().Max(); + + double max_ratio = 0; + double max_diff = 0; + const double eps = 1e-6f; + if (FLAGS_check_result) { + LOG(INFO) << "diff, image convert size: " << out_size; + uint8_t* diff_v = new uint8_t[out_size]; + for (int i = 0; i < out_size; i++) { + uint8_t a = lite_dst[i]; + uint8_t b = basic_dst[i]; + uint8_t diff1 = a - b; + uint8_t diff = diff1 > 0 ? diff1 : -diff1; + diff_v[i] = diff; + if (max_diff < diff) { + max_diff = diff; + max_ratio = 2.0 * max_diff / (a + b + eps); + } + } + if (std::abs(max_ratio) >= 1e-5f) { + int width = size / srch; + printf("din: \n"); + print_int8(src, size, width); + width = out_size / srch; + printf("saber result: \n"); + print_int8(lite_dst, out_size, width); + printf("basic result: \n"); + print_int8(basic_dst, out_size, width); + printf("diff result: \n"); + print_int8(diff_v, out_size, width); + } + delete[] diff_v; + LOG(INFO) << "compare result, max diff: " << max_diff + << ", max ratio: " << max_ratio; + bool rst = std::abs(max_ratio) < 1e-5f; + CHECK_EQ(rst, true) << "compute result error"; + } + LOG(INFO) << "image convert end"; + } + } +} + #if 1 TEST(TestImageConvertRand, test_func_image_convert_preprocess) { if (FLAGS_basic_test) { for (auto w : {1, 4, 8, 16, 112, 224, 1092}) { for (auto h : {1, 4, 16, 112, 224}) { - for (auto ww : {66}) { - for (auto hh : {12}) { - for (auto rotate : {180}) { - for (auto flip : {0}) { - for (auto srcFormat : {0, 1, 2, 3, 4, 11, 12}) { - for (auto dstFormat : {0, 1, 2, 3, 4}) { - for (auto layout : {1}) { - if ((srcFormat == ImageFormat::NV12 || - srcFormat == ImageFormat::NV21) && - (dstFormat == ImageFormat::GRAY)) { - continue; - } - if ((dstFormat == ImageFormat::NV12 || - dstFormat == ImageFormat::NV21) && - (srcFormat == ImageFormat::GRAY)) { - continue; - } - if (srcFormat == ImageFormat::NV12 || - srcFormat == ImageFormat::NV21) { - if (w % 2) { // is not ou shu, two line y == one line - // uv - continue; - } - } - test_img({FLAGS_cluster}, + for (auto rotate : {180}) { + for (auto flip : {0}) { + for (auto srcFormat : {0, 1, 2, 3, 4, 11, 12}) { + for (auto dstFormat : {0, 1, 2, 3, 4}) { + for (auto layout : {1}) { + if ((srcFormat == ImageFormat::NV12 || + srcFormat == ImageFormat::NV21) && + (dstFormat == ImageFormat::GRAY)) { + continue; + } + if ((dstFormat == ImageFormat::NV12 || + dstFormat == ImageFormat::NV21) && + (srcFormat == ImageFormat::GRAY)) { + continue; + } + if (srcFormat == ImageFormat::NV12 || + srcFormat == ImageFormat::NV21) { + if (w % 2) { + continue; + } + } + test_convert({FLAGS_cluster}, {1}, w, h, - ww, - hh, + w, + h, (ImageFormat)srcFormat, (ImageFormat)dstFormat, rotate, (FlipParam)flip, (LayoutType)layout); - } - } } } } @@ -614,41 +1288,32 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) { #if 1 TEST(TestImageConvertRand, test_func_image_resize_preprocess) { if (FLAGS_basic_test) { - for (auto w : {1, 4, 8, 16, 112, 224, 1092}) { - for (auto h : {1, 4, 16, 112, 224}) { - for (auto ww : {1, 2, 8, 32, 112}) { - for (auto hh : {1, 2, 8, 112}) { + for (auto w : {8, 16, 112, 224, 1092}) { + for (auto h : {4, 16, 112, 224}) { + for (auto ww : {8, 32, 112}) { + for (auto hh : {8, 112}) { for (auto rotate : {180}) { for (auto flip : {0}) { for (auto srcFormat : {0, 1, 2, 3, 4, 11, 12}) { - for (auto dstFormat : {0, 1, 2, 3, 4, 11}) { - for (auto layout : {1}) { - if (dstFormat == ImageFormat::NV12 || - dstFormat == ImageFormat::NV21 || - (srcFormat == ImageFormat::NV12 || - srcFormat == ImageFormat::NV21) && - dstFormat == ImageFormat::GRAY) { + for (auto layout : {1}) { + auto dstFormat = srcFormat; + if (srcFormat == ImageFormat::NV12 || + srcFormat == ImageFormat::NV21) { + if (w % 2) { continue; } - if (srcFormat == ImageFormat::NV12 || - srcFormat == ImageFormat::NV21) { - if (w % 2) { // is not ou shu, two line y == one line - // uv - continue; - } - } - test_img({FLAGS_cluster}, - {1, 2, 4}, - w, - h, - ww, - hh, - (ImageFormat)srcFormat, - (ImageFormat)dstFormat, - rotate, - (FlipParam)flip, - (LayoutType)layout); } + test_resize({FLAGS_cluster}, + {1, 2, 4}, + w, + h, + ww, + hh, + (ImageFormat)srcFormat, + (ImageFormat)dstFormat, + rotate, + (FlipParam)flip, + (LayoutType)layout); } } } @@ -665,34 +1330,40 @@ TEST(TestImageConvertRand, test_func_image_trans_preprocess) { if (FLAGS_basic_test) { for (auto w : {1, 8, 16, 112, 224, 1092}) { for (auto h : {1, 16, 112, 224}) { - for (auto ww : {32, 112}) { - for (auto hh : {112}) { - for (auto rotate : {90, 180, 270}) { - for (auto flip : {-1, 0, 1}) { - for (auto srcFormat : {0}) { - for (auto dstFormat : {0, 1, 2, 3, 4}) { - for (auto layout : {1, 3}) { - if (srcFormat == ImageFormat::NV12 || - srcFormat == ImageFormat::NV21) { - if (w % 2) { // is not ou shu, two line y == one line - // uv - continue; - } - } - test_img({FLAGS_cluster}, - {1, 2, 4}, - w, - h, - ww, - hh, - (ImageFormat)srcFormat, - (ImageFormat)dstFormat, - rotate, - (FlipParam)flip, - (LayoutType)layout); - } + for (auto rotate : {90, 180, 270}) { + for (auto flip : {-1, 0, 1}) { + for (auto srcFormat : {0, 1, 2, 3, 4}) { + for (auto layout : {1, 3}) { + auto dstFormat = srcFormat; + if (srcFormat == ImageFormat::NV12 || + srcFormat == ImageFormat::NV21) { + if (w % 2) { + continue; } } + test_flip({FLAGS_cluster}, + {1, 2, 4}, + w, + h, + w, + h, + (ImageFormat)srcFormat, + (ImageFormat)dstFormat, + rotate, + (FlipParam)flip, + (LayoutType)layout); + + test_rotate({FLAGS_cluster}, + {1, 2, 4}, + w, + h, + w, + h, + (ImageFormat)srcFormat, + (ImageFormat)dstFormat, + rotate, + (FlipParam)flip, + (LayoutType)layout); } } } diff --git a/lite/utils/cv/image_flip.cc b/lite/utils/cv/image_flip.cc index f535c858e4dddcd04a0ce8cfa7a727356df34d64..7b7936935d0c26e4d1f023f77063ce9ee8dd73ec 100644 --- a/lite/utils/cv/image_flip.cc +++ b/lite/utils/cv/image_flip.cc @@ -110,7 +110,8 @@ rotate: */ void flip_hwc1_x(const uint8_t* src, uint8_t* dst, int w_in, int h_in) { int h = h_in - 1; - uint8_t zerobuff[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + uint8_t* zerobuff = new uint8_t[w_in]; + memset(zerobuff, 0.0, sizeof(uint8_t) * w_in); #pragma omp parallel for for (int i = 0; i < h_in; i += 4) { const uint8_t* inptr0 = src + i * w_in; @@ -233,7 +234,8 @@ flip: */ void flip_hwc1_y(const uint8_t* src, uint8_t* dst, int w_in, int h_in) { int64_t stride_w = 8; - uint8_t zerobuff[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + uint8_t* zerobuff = new uint8_t[w_in]; + memset(zerobuff, 0.0, sizeof(uint8_t) * w_in); #pragma omp parallel for for (int i = 0; i < h_in; i += 4) { const uint8_t* inptr0 = src + i * w_in; @@ -386,7 +388,8 @@ flip: */ void flip_hwc1_xy(const uint8_t* src, uint8_t* dst, int w_in, int h_in) { int64_t stride_w = 8; - uint8_t zerobuff[8] = {0, 0, 0, 0, 0, 0, 0, 0}; + uint8_t* zerobuff = new uint8_t[w_in]; + memset(zerobuff, 0.0, sizeof(uint8_t) * w_in); #pragma omp parallel for for (int i = 0; i < h_in; i += 4) { const uint8_t* inptr0 = src + i * w_in; @@ -398,17 +401,17 @@ void flip_hwc1_xy(const uint8_t* src, uint8_t* dst, int w_in, int h_in) { uint8_t* outptr1 = outptr0 - w_in; uint8_t* outptr2 = outptr1 - w_in; uint8_t* outptr3 = outptr2 - w_in; - if (i + 3 >= h_in) { - switch ((i + 3) - h_in) { + if (i + 4 > h_in) { + switch ((i + 4) - h_in) { case 3: - inptr0 = zerobuff; - outptr0 = zerobuff; - case 2: inptr1 = zerobuff; outptr1 = zerobuff; - case 1: + case 2: inptr2 = zerobuff; outptr2 = zerobuff; + case 1: + inptr3 = zerobuff; + outptr3 = zerobuff; case 0: inptr3 = zerobuff; outptr3 = zerobuff; @@ -504,16 +507,16 @@ void flip_hwc1_xy(const uint8_t* src, uint8_t* dst, int w_in, int h_in) { outptr1 += stride_w - 1; outptr0 += stride_w - 1; for (; j < w_in; j++) { - if (i + 3 >= h_in) { - switch ((i + 3) - h_in) { - case 0: + if (i + 4 > h_in) { + switch ((i + 4) - h_in) { + case 3: *outptr2-- = *inptr2++; - case 1: + case 2: *outptr1-- = *inptr1++; // inptr1 = zerobuff; - case 2: + case 1: *outptr0-- = *inptr0++; - case 3: + case 0: // inptr3 = zerobuff; default: break; diff --git a/lite/utils/cv/image_resize.cc b/lite/utils/cv/image_resize.cc index 00a59e26a9a41873e07790285d94f6737bf382aa..3e67c9386f963ab31f6b200a6badde93e431c482 100644 --- a/lite/utils/cv/image_resize.cc +++ b/lite/utils/cv/image_resize.cc @@ -69,7 +69,7 @@ void resize(const uint8_t* src, int size = srcw * srch; if (srcw == dstw && srch == dsth) { if (srcFormat == NV12 || srcFormat == NV21) { - size = srcw * (floor(1.5 * srch)); + size = srcw * (static_cast(1.5 * srch)); } else if (srcFormat == BGR || srcFormat == RGB) { size = 3 * srcw * srch; } else if (srcFormat == BGRA || srcFormat == RGBA) { @@ -81,7 +81,7 @@ void resize(const uint8_t* src, double scale_x = static_cast(srcw) / dstw; double scale_y = static_cast(srch) / dsth; - int* buf = new int[dstw * 2 + dsth * 2]; + int* buf = new int[dstw * 2 + dsth * 3]; int* xofs = buf; int* yofs = buf + dstw; @@ -110,7 +110,7 @@ void resize(const uint8_t* src, } compute_xy( - srcw, srch, dstw, dsth, num, scale_x, scale_y, xofs, yofs, ialpha, ibeta); + srcw, srch, dstw, orih, num, scale_x, scale_y, xofs, yofs, ialpha, ibeta); int* xofs1 = nullptr; int* yofs1 = nullptr; @@ -131,7 +131,7 @@ void resize(const uint8_t* src, xofs1, yofs1, ialpha1, - ibeta + orih); + ibeta + orih * 2); } int cnt = w_out >> 3; int remain = w_out % 8; @@ -160,7 +160,6 @@ void resize(const uint8_t* src, int sx = xofs[dx / num]; int16_t a0 = ialphap[0]; int16_t a1 = ialphap[1]; - const uint8_t* S0pl = S0 + sx; const uint8_t* S0pr = S0 + sx + num; const uint8_t* S1pl = S1 + sx; @@ -323,7 +322,6 @@ void compute_xy(int srcw, fy = static_cast((dy + 0.5) * scale_y - 0.5); sy = floor(fy); fy -= sy; - if (sy < 0) { sy = 0; fy = 0.f; @@ -332,12 +330,9 @@ void compute_xy(int srcw, sy = srch - 2; fy = 1.f; } - yofs[dy] = sy; - float b0 = (1.f - fy) * resize_coef_scale; float b1 = fy * resize_coef_scale; - ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); } diff --git a/lite/utils/cv/image_rotate.cc b/lite/utils/cv/image_rotate.cc index 98e61fb444aad691d28ae2116dbbd5743e20e481..1bacaa000db747539ef61a81834a0f1826b8b3ba 100644 --- a/lite/utils/cv/image_rotate.cc +++ b/lite/utils/cv/image_rotate.cc @@ -79,6 +79,7 @@ void rotate_hwc1( void rotate_hwc3( const uint8_t* src, uint8_t* dst, int srcw, int srch, float degree) { if (degree == 90) { + printf("rotate_hwc3_90 \n"); rotate_hwc3_90(src, dst, srcw, srch, srch, srcw); } else if (degree == 180) { rotate_hwc3_180(src, dst, srcw, srch, srcw, srch); @@ -679,14 +680,14 @@ void rotate_hwc1_90(const uint8_t* src, const uint8_t* inptr7 = inptr6 + w_in; for (; j < w_in; j++) { uint8_t* outptr = dst + j * w_out + ww - i; - *outptr++ = *inptr0++; - *outptr++ = *inptr1++; - *outptr++ = *inptr2++; - *outptr++ = *inptr3++; - *outptr++ = *inptr4++; - *outptr++ = *inptr5++; - *outptr++ = *inptr6++; *outptr++ = *inptr7++; + *outptr++ = *inptr6++; + *outptr++ = *inptr5++; + *outptr++ = *inptr4++; + *outptr++ = *inptr3++; + *outptr++ = *inptr2++; + *outptr++ = *inptr1++; + *outptr++ = *inptr0++; } } ww = w_out - 1; @@ -856,6 +857,7 @@ void rotate_hwc1_180(const uint8_t* src, } } } + delete[] zerobuff; } /* 1 2 3 @@ -1386,6 +1388,7 @@ void rotate_hwc3_180(const uint8_t* src, } } } + delete[] zerobuff; } void rotate_hwc3_270(const uint8_t* src, @@ -1915,6 +1918,7 @@ void rotate_hwc4_180(const uint8_t* src, } } } + delete[] zerobuff; } void rotate_hwc4_270(const uint8_t* src,