diff --git a/lite/tests/cv/cv_basic.h b/lite/tests/cv/cv_basic.h index 45ae3fb616527f404984bba7b2366a2f66c09a96..5e867487e2e5f75411aae7204dcacd0dd791ee98 100644 --- a/lite/tests/cv/cv_basic.h +++ b/lite/tests/cv/cv_basic.h @@ -538,10 +538,10 @@ void image_resize_basic(const uint8_t* in_data, int* yofs1 = nullptr; if (orih < dsth) { int tmp = dsth - orih; - ialpha1 = new float[srcw]; - xofs1 = new int[dstw / 2]; + ialpha1 = new float[dstw]; + xofs1 = new int[dstw]; yofs1 = new int[tmp]; - compute_xy(srcw / 2, + compute_xy(srcw, srch / 2, dstw / 2, tmp, @@ -565,7 +565,7 @@ void image_resize_basic(const uint8_t* in_data, ialpha = ialpha1; xofs = xofs1; yofs = yofs1; - y_in_start = yofs[dy - orih]; + y_in_start = yofs[dy - orih] + srch; } int y_in_end = y_in_start + 1; if (y_in_start < 0) { diff --git a/lite/utils/cv/image_resize.cc b/lite/utils/cv/image_resize.cc index 3e67c9386f963ab31f6b200a6badde93e431c482..2fc884a0fa4fd359c150115a20bdf751094b4687 100644 --- a/lite/utils/cv/image_resize.cc +++ b/lite/utils/cv/image_resize.cc @@ -47,6 +47,592 @@ void ImageResize::choose(const uint8_t* src, int dsth) { resize(src, dst, srcFormat, srcw, srch, dstw, dsth); } +void resize_three_channel( + const uint8_t* src, int w_in, int h_in, uint8_t* dst, int w_out, int h_out); + +void bgr_resize(const uint8_t* src, + uint8_t* dst, + int w_in, + int h_in, + int w_out, + int h_out) { + if (w_out == w_in && h_out == h_in) { + memcpy(dst, src, sizeof(uint8_t) * w_in * h_in * 3); + return; + } + // y + resize_three_channel(src, w_in * 3, h_in, dst, w_out * 3, h_out); +} +void resize_three_channel(const uint8_t* src, + int w_in, + int h_in, + uint8_t* dst, + int w_out, + int h_out) { + const int resize_coef_bits = 11; + const int resize_coef_scale = 1 << resize_coef_bits; + double scale_x = static_cast(w_in) / w_out; + double scale_y = static_cast(h_in) / h_out; + int* buf = new int[w_out * 2 + h_out * 2]; + int* xofs = buf; // new int[w]; + int* yofs = buf + w_out; // new int[h]; + int16_t* ialpha = + reinterpret_cast(buf + w_out + h_out); // new int16_t[w * 2]; + int16_t* ibeta = + reinterpret_cast(buf + w_out * 2 + h_out); // new short[h * 2]; + float fx = 0.f; + float fy = 0.f; + int sx = 0.f; + int sy = 0.f; +#define SATURATE_CAST_SHORT(X) \ + (int16_t)::std::min( \ + ::std::max(static_cast(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), \ + SHRT_MAX); + // #pragma omp parallel for + for (int dx = 0; dx < w_out / 3; dx++) { + fx = static_cast((dx + 0.5) * scale_x - 0.5); + sx = floor(fx); + fx -= sx; + if (sx < 0) { + sx = 0; + fx = 0.f; + } + if (sx >= w_in - 1) { + sx = w_in - 2; + fx = 1.f; + } + xofs[dx] = sx * 3; + float a0 = (1.f - fx) * resize_coef_scale; + float a1 = fx * resize_coef_scale; + ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); + ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); + } + // #pragma omp parallel for + for (int dy = 0; dy < h_out; dy++) { + fy = static_cast((dy + 0.5) * scale_y - 0.5); + sy = floor(fy); + fy -= sy; + if (sy < 0) { + sy = 0; + fy = 0.f; + } + if (sy >= h_in - 1) { + sy = h_in - 2; + fy = 1.f; + } + yofs[dy] = sy; + float b0 = (1.f - fy) * resize_coef_scale; + float b1 = fy * resize_coef_scale; + ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); + ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); + } +#undef SATURATE_CAST_SHORT + // loop body + int16_t* rowsbuf0 = new int16_t[w_out + 1]; + int16_t* rowsbuf1 = new int16_t[w_out + 1]; + int16_t* rows0 = rowsbuf0; + int16_t* rows1 = rowsbuf1; + int prev_sy1 = -1; + for (int dy = 0; dy < h_out; dy++) { + int sy = yofs[dy]; + if (sy == prev_sy1) { + // hresize one row + int16_t* rows0_old = rows0; + rows0 = rows1; + rows1 = rows0_old; + const uint8_t* S1 = src + w_in * (sy + 1); + const int16_t* ialphap = ialpha; + int16_t* rows1p = rows1; + for (int dx = 0; dx < w_out / 3; dx++) { + int sx = xofs[dx]; + int16_t a0 = ialphap[0]; + int16_t a1 = ialphap[1]; + const uint8_t* S1p = S1 + sx; + int tmp = dx * 3; + rows1p[tmp] = (S1p[0] * a0 + S1p[3] * a1) >> 4; + rows1p[tmp + 1] = (S1p[1] * a0 + S1p[4] * a1) >> 4; + rows1p[tmp + 2] = (S1p[2] * a0 + S1p[5] * a1) >> 4; + ialphap += 2; + } + } else { + // hresize two rows + const uint8_t* S0 = src + w_in * (sy); + const uint8_t* S1 = src + w_in * (sy + 1); + const int16_t* ialphap = ialpha; + int16_t* rows0p = rows0; + int16_t* rows1p = rows1; + for (int dx = 0; dx < w_out / 3; dx++) { + int sx = xofs[dx]; + int16_t a0 = ialphap[0]; + int16_t a1 = ialphap[1]; + const uint8_t* S0p = S0 + sx; + const uint8_t* S1p = S1 + sx; + int tmp = dx * 3; + rows0p[tmp] = (S0p[0] * a0 + S0p[3] * a1) >> 4; + rows1p[tmp] = (S1p[0] * a0 + S1p[3] * a1) >> 4; + rows0p[tmp + 1] = (S0p[1] * a0 + S0p[4] * a1) >> 4; + rows1p[tmp + 1] = (S1p[1] * a0 + S1p[4] * a1) >> 4; + rows0p[tmp + 2] = (S0p[2] * a0 + S0p[5] * a1) >> 4; + rows1p[tmp + 2] = (S1p[2] * a0 + S1p[5] * a1) >> 4; + ialphap += 2; + } + } + prev_sy1 = sy + 1; + // vresize + int16_t b0 = ibeta[0]; + int16_t b1 = ibeta[1]; + int16_t* rows0p = rows0; + int16_t* rows1p = rows1; + uint8_t* dp_ptr = dst + w_out * (dy); + int cnt = w_out >> 3; + int remain = w_out - (cnt << 3); + int16x4_t _b0 = vdup_n_s16(b0); + int16x4_t _b1 = vdup_n_s16(b1); + int32x4_t _v2 = vdupq_n_s32(2); + for (cnt = w_out >> 3; cnt > 0; cnt--) { + int16x4_t _rows0p_sr4 = vld1_s16(rows0p); + int16x4_t _rows1p_sr4 = vld1_s16(rows1p); + int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4); + int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4); + int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0); + int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1); + int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0); + int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1); + int32x4_t _acc = _v2; + _acc = vsraq_n_s32( + _acc, _rows0p_sr4_mb0, 16); // _acc >> 16 + _rows0p_sr4_mb0 >> 16 + _acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16); + int32x4_t _acc_1 = _v2; + _acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16); + _acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16); + int16x4_t _acc16 = vshrn_n_s32(_acc, 2); // _acc >> 2 + int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2); + uint8x8_t _dout = vqmovun_s16(vcombine_s16(_acc16, _acc16_1)); + vst1_u8(dp_ptr, _dout); + dp_ptr += 8; + rows0p += 8; + rows1p += 8; + } + for (; remain; --remain) { + // D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS; + *dp_ptr++ = + (uint8_t)(((int16_t)((b0 * (int16_t)(*rows0p++)) >> 16) + + (int16_t)((b1 * (int16_t)(*rows1p++)) >> 16) + 2) >> + 2); + } + ibeta += 2; + } + delete[] buf; + delete[] rowsbuf0; + delete[] rowsbuf1; +} +void resize_one_channel( + const uint8_t* src, int w_in, int h_in, uint8_t* dst, int w_out, int h_out); +void resize_one_channel_uv( + const uint8_t* src, int w_in, int h_in, uint8_t* dst, int w_out, int h_out); +void nv21_resize(const uint8_t* src, + uint8_t* dst, + int w_in, + int h_in, + int w_out, + int h_out) { + if (w_out == w_in && h_out == h_in) { + memcpy(dst, src, sizeof(uint8_t) * w_in * static_cast(1.5 * h_in)); + return; + } + // return; + int y_h = h_in; + int uv_h = h_in / 2; + const uint8_t* y_ptr = src; + const uint8_t* uv_ptr = src + y_h * w_in; + // out + int dst_y_h = h_out; + int dst_uv_h = h_out / 2; + uint8_t* dst_ptr = dst + dst_y_h * w_out; + // y + resize_one_channel(y_ptr, w_in, y_h, dst, w_out, dst_y_h); + // uv + resize_one_channel_uv(uv_ptr, w_in, uv_h, dst_ptr, w_out, dst_uv_h); +} + +void resize_one_channel(const uint8_t* src, + int w_in, + int h_in, + uint8_t* dst, + int w_out, + int h_out) { + const int resize_coef_bits = 11; + const int resize_coef_scale = 1 << resize_coef_bits; + + double scale_x = static_cast(w_in) / w_out; + double scale_y = static_cast(h_in) / h_out; + + int* buf = new int[w_out * 2 + h_out * 2]; + + int* xofs = buf; // new int[w]; + int* yofs = buf + w_out; // new int[h]; + + int16_t* ialpha = + reinterpret_cast(buf + w_out + h_out); // new short[w * 2]; + int16_t* ibeta = + reinterpret_cast(buf + w_out * 2 + h_out); // new short[h * 2]; + + float fx = 0.f; + float fy = 0.f; + int sx = 0; + int sy = 0; + +#define SATURATE_CAST_SHORT(X) \ + (int16_t)::std::min( \ + ::std::max(static_cast(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), \ + SHRT_MAX); + for (int dx = 0; dx < w_out; dx++) { + fx = static_cast((dx + 0.5) * scale_x - 0.5); + sx = floor(fx); + fx -= sx; + + if (sx < 0) { + sx = 0; + fx = 0.f; + } + if (sx >= w_in - 1) { + sx = w_in - 2; + fx = 1.f; + } + + xofs[dx] = sx; + + float a0 = (1.f - fx) * resize_coef_scale; + float a1 = fx * resize_coef_scale; + + ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); + ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); + } + for (int dy = 0; dy < h_out; dy++) { + fy = static_cast((dy + 0.5) * scale_y - 0.5); + sy = floor(fy); + fy -= sy; + + if (sy < 0) { + sy = 0; + fy = 0.f; + } + if (sy >= h_in - 1) { + sy = h_in - 2; + fy = 1.f; + } + + yofs[dy] = sy; + + float b0 = (1.f - fy) * resize_coef_scale; + float b1 = fy * resize_coef_scale; + + ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); + ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); + } +#undef SATURATE_CAST_SHORT + // loop body + int16_t* rowsbuf0 = new int16_t[w_out + 1]; + int16_t* rowsbuf1 = new int16_t[w_out + 1]; + int16_t* rows0 = rowsbuf0; + int16_t* rows1 = rowsbuf1; + + int prev_sy1 = -1; + for (int dy = 0; dy < h_out; dy++) { + int sy = yofs[dy]; + + if (sy == prev_sy1) { + // hresize one row + int16_t* rows0_old = rows0; + rows0 = rows1; + rows1 = rows0_old; + const uint8_t* S1 = src + w_in * (sy + 1); + const int16_t* ialphap = ialpha; + int16_t* rows1p = rows1; + for (int dx = 0; dx < w_out; dx++) { + int sx = xofs[dx]; + int16_t a0 = ialphap[0]; + int16_t a1 = ialphap[1]; + + const uint8_t* S1p = S1 + sx; + rows1p[dx] = (S1p[0] * a0 + S1p[1] * a1) >> 4; + + ialphap += 2; + } + } else { + // hresize two rows + const uint8_t* S0 = src + w_in * (sy); + const uint8_t* S1 = src + w_in * (sy + 1); + + const int16_t* ialphap = ialpha; + int16_t* rows0p = rows0; + int16_t* rows1p = rows1; + for (int dx = 0; dx < w_out; dx++) { + int sx = xofs[dx]; + int16_t a0 = ialphap[0]; + int16_t a1 = ialphap[1]; + + const uint8_t* S0p = S0 + sx; + const uint8_t* S1p = S1 + sx; + rows0p[dx] = (S0p[0] * a0 + S0p[1] * a1) >> 4; + rows1p[dx] = (S1p[0] * a0 + S1p[1] * a1) >> 4; + + ialphap += 2; + } + } + + prev_sy1 = sy + 1; + + // vresize + int16_t b0 = ibeta[0]; + int16_t b1 = ibeta[1]; + + int16_t* rows0p = rows0; + int16_t* rows1p = rows1; + uint8_t* dp_ptr = dst + w_out * (dy); + + int cnt = w_out >> 3; + int remain = w_out - (cnt << 3); + int16x4_t _b0 = vdup_n_s16(b0); + int16x4_t _b1 = vdup_n_s16(b1); + int32x4_t _v2 = vdupq_n_s32(2); + + for (cnt = w_out >> 3; cnt > 0; cnt--) { + int16x4_t _rows0p_sr4 = vld1_s16(rows0p); + int16x4_t _rows1p_sr4 = vld1_s16(rows1p); + int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4); + int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4); + + int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0); + int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1); + int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0); + int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1); + + int32x4_t _acc = _v2; + _acc = vsraq_n_s32(_acc, _rows0p_sr4_mb0, 16); + _acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16); + + int32x4_t _acc_1 = _v2; + _acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16); + _acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16); + + int16x4_t _acc16 = vshrn_n_s32(_acc, 2); + int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2); + + uint8x8_t _dout = vqmovun_s16(vcombine_s16(_acc16, _acc16_1)); + + vst1_u8(dp_ptr, _dout); + + dp_ptr += 8; + rows0p += 8; + rows1p += 8; + } + for (; remain; --remain) { + // D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS; + *dp_ptr++ = + (uint8_t)(((int16_t)((b0 * (int16_t)(*rows0p++)) >> 16) + + (int16_t)((b1 * (int16_t)(*rows1p++)) >> 16) + 2) >> + 2); + } + ibeta += 2; + } + + delete[] buf; + delete[] rowsbuf0; + delete[] rowsbuf1; +} + +void resize_one_channel_uv(const uint8_t* src, + int w_in, + int h_in, + uint8_t* dst, + int w_out, + int h_out) { + const int resize_coef_bits = 11; + const int resize_coef_scale = 1 << resize_coef_bits; + + double scale_x = static_cast(w_in) / w_out; + double scale_y = static_cast(h_in) / h_out; + + int* buf = new int[w_out * 2 + h_out * 2]; + + int* xofs = buf; // new int[w]; + int* yofs = buf + w_out; // new int[h]; + + int16_t* ialpha = + reinterpret_cast(buf + w_out + h_out); // new int16_t[w * 2]; + int16_t* ibeta = reinterpret_cast(buf + w_out * 2 + + h_out); // new int16_t[h * 2]; + + float fx = 0.f; + float fy = 0.f; + int sx = 0.f; + int sy = 0.f; + +#define SATURATE_CAST_SHORT(X) \ + (int16_t)::std::min( \ + ::std::max(static_cast(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), \ + SHRT_MAX); + for (int dx = 0; dx < w_out / 2; dx++) { + fx = static_cast((dx + 0.5) * scale_x - 0.5); + sx = floor(fx); + fx -= sx; + + if (sx < 0) { + sx = 0; + fx = 0.f; + } + if (sx >= w_in - 1) { + sx = w_in - 2; + fx = 1.f; + } + + xofs[dx] = sx; + + float a0 = (1.f - fx) * resize_coef_scale; + float a1 = fx * resize_coef_scale; + + ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); + ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); + } + for (int dy = 0; dy < h_out; dy++) { + fy = static_cast((dy + 0.5) * scale_y - 0.5); + sy = floor(fy); + fy -= sy; + + if (sy < 0) { + sy = 0; + fy = 0.f; + } + if (sy >= h_in - 1) { + sy = h_in - 2; + fy = 1.f; + } + + yofs[dy] = sy; + + float b0 = (1.f - fy) * resize_coef_scale; + float b1 = fy * resize_coef_scale; + + ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); + ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); + } + +#undef SATURATE_CAST_SHORT + // loop body + int16_t* rowsbuf0 = new int16_t[w_out + 1]; + int16_t* rowsbuf1 = new int16_t[w_out + 1]; + int16_t* rows0 = rowsbuf0; + int16_t* rows1 = rowsbuf1; + + int prev_sy1 = -1; + for (int dy = 0; dy < h_out; dy++) { + int sy = yofs[dy]; + if (sy == prev_sy1) { + // hresize one row + int16_t* rows0_old = rows0; + rows0 = rows1; + rows1 = rows0_old; + const uint8_t* S1 = src + w_in * (sy + 1); + + const int16_t* ialphap = ialpha; + int16_t* rows1p = rows1; + for (int dx = 0; dx < w_out / 2; dx++) { + int sx = xofs[dx] * 2; + int16_t a0 = ialphap[0]; + int16_t a1 = ialphap[1]; + const uint8_t* S1p = S1 + sx; + int tmp = dx * 2; + rows1p[tmp] = (S1p[0] * a0 + S1p[2] * a1) >> 4; + rows1p[tmp + 1] = (S1p[1] * a0 + S1p[3] * a1) >> 4; + + ialphap += 2; + } + } else { + // hresize two rows + const uint8_t* S0 = src + w_in * (sy); + const uint8_t* S1 = src + w_in * (sy + 1); + + const int16_t* ialphap = ialpha; + int16_t* rows0p = rows0; + int16_t* rows1p = rows1; + for (int dx = 0; dx < w_out / 2; dx++) { + int sx = xofs[dx] * 2; + int16_t a0 = ialphap[0]; + int16_t a1 = ialphap[1]; + + const uint8_t* S0p = S0 + sx; + const uint8_t* S1p = S1 + sx; + int tmp = dx * 2; + rows0p[tmp] = (S0p[0] * a0 + S0p[2] * a1) >> 4; + rows1p[tmp] = (S1p[0] * a0 + S1p[2] * a1) >> 4; + + rows0p[tmp + 1] = (S0p[1] * a0 + S0p[3] * a1) >> 4; + rows1p[tmp + 1] = (S1p[1] * a0 + S1p[3] * a1) >> 4; + ialphap += 2; + } + } + prev_sy1 = sy + 1; + + // vresize + int16_t b0 = ibeta[0]; + int16_t b1 = ibeta[1]; + + int16_t* rows0p = rows0; + int16_t* rows1p = rows1; + uint8_t* dp_ptr = dst + w_out * (dy); + + int cnt = w_out >> 3; + int remain = w_out - (cnt << 3); + int16x4_t _b0 = vdup_n_s16(b0); + int16x4_t _b1 = vdup_n_s16(b1); + int32x4_t _v2 = vdupq_n_s32(2); + for (cnt = w_out >> 3; cnt > 0; cnt--) { + int16x4_t _rows0p_sr4 = vld1_s16(rows0p); + int16x4_t _rows1p_sr4 = vld1_s16(rows1p); + int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4); + int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4); + + int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0); + int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1); + int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0); + int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1); + + int32x4_t _acc = _v2; + _acc = vsraq_n_s32( + _acc, _rows0p_sr4_mb0, 16); // _acc >> 16 + _rows0p_sr4_mb0 >> 16 + _acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16); + + int32x4_t _acc_1 = _v2; + _acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16); + _acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16); + + int16x4_t _acc16 = vshrn_n_s32(_acc, 2); // _acc >> 2 + int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2); + + uint8x8_t _dout = vqmovun_s16(vcombine_s16(_acc16, _acc16_1)); + + vst1_u8(dp_ptr, _dout); + + dp_ptr += 8; + rows0p += 8; + rows1p += 8; + } + for (; remain; --remain) { + // D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS; + *dp_ptr++ = + (uint8_t)(((int16_t)((b0 * (int16_t)(*rows0p++)) >> 16) + + (int16_t)((b1 * (int16_t)(*rows1p++)) >> 16) + 2) >> + 2); + } + ibeta += 2; + } + + delete[] buf; + delete[] rowsbuf0; + delete[] rowsbuf1; +} + void compute_xy(int srcw, int srch, int dstw, @@ -95,10 +681,14 @@ void resize(const uint8_t* src, if (srcFormat == GRAY) { num = 1; } else if (srcFormat == NV12 || srcFormat == NV21) { + nv21_resize(src, dst, srcw, srch, dstw, dsth); + return; num = 1; int hout = static_cast(0.5 * dsth); dsth += hout; } else if (srcFormat == BGR || srcFormat == RGB) { + bgr_resize(src, dst, srcw, srch, dstw, dsth); + return; w_in = srcw * 3; w_out = dstw * 3; num = 3; @@ -117,13 +707,12 @@ void resize(const uint8_t* src, int16_t* ialpha1 = nullptr; if (orih < dsth) { // uv int tmp = dsth - orih; - int w = dstw / 2; - xofs1 = new int[w]; + xofs1 = new int[dstw]; yofs1 = new int[tmp]; - ialpha1 = new int16_t[srcw]; - compute_xy(srcw / 2, + ialpha1 = new int16_t[dstw]; + compute_xy(srcw, srch / 2, - w, + dstw / 2, tmp, 2, scale_x, @@ -139,15 +728,15 @@ void resize(const uint8_t* src, int prev_sy1 = -1; #pragma omp parallel for for (int dy = 0; dy < dsth; dy++) { - int16_t* rowsbuf0 = new int16_t[w_out]; - int16_t* rowsbuf1 = new int16_t[w_out]; + int16_t* rowsbuf0 = new int16_t[w_out + 1]; + int16_t* rowsbuf1 = new int16_t[w_out + 1]; int sy = yofs[dy]; if (dy >= orih) { xofs = xofs1; yofs = yofs1; ialpha = ialpha1; num = 2; - sy = yofs1[dy - orih]; + sy = yofs1[dy - orih] + srch; } // hresize two rows