未验证 提交 515f9a6a 编写于 作者: H HappyAngel 提交者: GitHub

[arm] add cv unit_test (#4250)

add cv_ut. test=develop
add Anakin implement
add image_profiler test
上级 339c2e53
......@@ -91,14 +91,24 @@ ImagePreprocess::ImagePreprocess(ImageFormat srcFormat, ImageFormat dstFormat, T
// 方法二
void ImagePreprocess::imageCovert(const uint8_t* src,
uint8_t* dst, ImageFormat srcFormat, ImageFormat dstFormat);
// 方法三
void ImagePreprocess::imageCovert(const uint8_t* src,
uint8_t* dst, ImageFormat srcFormat, ImageFormat dstFormat,
int srcw, int srch);
```
+ 第一个 `imageCovert` 接口,缺省参数来源于 `ImagePreprocess` 类的成员变量。故在初始化 `ImagePreprocess` 类的对象时,必须要给以下成员变量赋值:
- param srcFormat:`ImagePreprocess` 类的成员变量`srcFormat_`
- param dstFormat:`ImagePreprocess` 类的成员变量`dstFormat_`
- param srcw: `ImagePreprocess` 类的成员变量`transParam_`结构体中的`iw`变量
- param srch: `ImagePreprocess` 类的成员变量`transParam_`结构体中的`ih`变量
- 第二个`imageCovert` 接口,可以直接使用
- 第二个`imageCovert` 接口,缺省参数来源于 `ImagePreprocess` 类的成员变量。故在初始化 `ImagePreprocess` 类的对象时,必须要给以下成员变量赋值:
- param srcw: `ImagePreprocess` 类的成员变量`transParam_`结构体中的`iw`变量
- param srch: `ImagePreprocess` 类的成员变量`transParam_`结构体中的`ih`变量
- 第二个`imageCovert` 接口, 可以直接使用
### 缩放 Resize
`Resize` 功能支持颜色空间:GRAY、NV12(NV21)、RGB(BGR)和RGBA(BGRA)
......
add_subdirectory(kernels)
add_subdirectory(math)
add_subdirectory(cv)
add_subdirectory(cv/anakin)
add_subdirectory(api)
if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_MLU) AND LITE_WITH_ARM)
lite_cc_test(image_convert_test SRCS image_convert_test.cc DEPS paddle_cv_arm)
lite_cc_test(image_profiler_test SRCS image_profiler_test.cc DEPS paddle_cv_arm anakin_cv_arm)
endif()
if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA AND NOT LITE_WITH_MLU) AND LITE_WITH_ARM)
lite_cc_library(anakin_cv_arm SRCS
bgr_resize.cc
bgr_flip_hwc.cc
bgr_rotate_hwc.cc
bgr_to_tensor_hwc.cc
bgra_resize.cc
bgra_flip_hwc.cc
bgra_rotate_hwc.cc
bgra_to_tensor_hwc.cc
cv_utils.cc
nv12_to_bgr.cc
nv12_to_bgra.cc
nv21_to_bgr.cc
nv21_to_bgra.cc
nv21_resize.cc
DEPS paddle_api place)
endif()
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <limits.h>
#include <math.h>
#include "lite/tests/cv/anakin/cv_utils.h"
void resize_three_channel(
const uint8_t* src, int w_in, int h_in, uint8_t* dst, int w_out, int h_out);
void bgr_resize(const uint8_t* src,
uint8_t* dst,
int w_in,
int h_in,
int w_out,
int h_out) {
if (w_out == w_in && h_out == h_in) {
memcpy(dst, src, sizeof(char) * w_in * h_in * 3);
return;
}
// y
resize_three_channel(src, w_in * 3, h_in, dst, w_out * 3, h_out);
}
void resize_three_channel(const uint8_t* src,
int w_in,
int h_in,
uint8_t* dst,
int w_out,
int h_out) {
const int resize_coef_bits = 11;
const int resize_coef_scale = 1 << resize_coef_bits;
double scale_x = static_cast<double>(w_in) / w_out;
double scale_y = static_cast<double>(h_in) / h_out;
int* buf = new int[w_out * 2 + h_out * 2];
int* xofs = buf; // new int[w];
int* yofs = buf + w_out; // new int[h];
int16_t* ialpha =
reinterpret_cast<int16_t*>(buf + w_out + h_out); // new int16_t[w * 2];
int16_t* ibeta =
reinterpret_cast<int16_t*>(buf + w_out * 2 + h_out); // new short[h * 2];
float fx = 0.f;
float fy = 0.f;
int sx = 0.f;
int sy = 0.f;
#define SATURATE_CAST_SHORT(X) \
(int16_t)::std::min( \
::std::max(static_cast<int>(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), \
SHRT_MAX);
for (int dx = 0; dx < w_out / 3; dx++) {
fx = static_cast<float>((dx + 0.5) * scale_x - 0.5);
sx = floor(fx);
fx -= sx;
if (sx < 0) {
sx = 0;
fx = 0.f;
}
if (sx >= w_in - 1) {
sx = w_in - 2;
fx = 1.f;
}
xofs[dx] = sx * 3;
float a0 = (1.f - fx) * resize_coef_scale;
float a1 = fx * resize_coef_scale;
ialpha[dx * 2] = SATURATE_CAST_SHORT(a0);
ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1);
}
for (int dy = 0; dy < h_out; dy++) {
fy = static_cast<float>((dy + 0.5) * scale_y - 0.5);
sy = floor(fy);
fy -= sy;
if (sy < 0) {
sy = 0;
fy = 0.f;
}
if (sy >= h_in - 1) {
sy = h_in - 2;
fy = 1.f;
}
yofs[dy] = sy;
float b0 = (1.f - fy) * resize_coef_scale;
float b1 = fy * resize_coef_scale;
ibeta[dy * 2] = SATURATE_CAST_SHORT(b0);
ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1);
}
#undef SATURATE_CAST_SHORT
// loop body
int16_t* rowsbuf0 = new int16_t[w_out + 1];
int16_t* rowsbuf1 = new int16_t[w_out + 1];
int16_t* rows0 = rowsbuf0;
int16_t* rows1 = rowsbuf1;
int prev_sy1 = -1;
for (int dy = 0; dy < h_out; dy++) {
int sy = yofs[dy];
if (sy == prev_sy1) {
// hresize one row
int16_t* rows0_old = rows0;
rows0 = rows1;
rows1 = rows0_old;
const uint8_t* S1 = src + w_in * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows1p = rows1;
for (int dx = 0; dx < w_out / 3; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S1p = S1 + sx;
int tmp = dx * 3;
rows1p[tmp] = (S1p[0] * a0 + S1p[3] * a1) >> 4;
rows1p[tmp + 1] = (S1p[1] * a0 + S1p[4] * a1) >> 4;
rows1p[tmp + 2] = (S1p[2] * a0 + S1p[5] * a1) >> 4;
ialphap += 2;
}
} else {
// hresize two rows
const uint8_t* S0 = src + w_in * (sy);
const uint8_t* S1 = src + w_in * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows0p = rows0;
int16_t* rows1p = rows1;
for (int dx = 0; dx < w_out / 3; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S0p = S0 + sx;
const uint8_t* S1p = S1 + sx;
int tmp = dx * 3;
rows0p[tmp] = (S0p[0] * a0 + S0p[3] * a1) >> 4;
rows1p[tmp] = (S1p[0] * a0 + S1p[3] * a1) >> 4;
rows0p[tmp + 1] = (S0p[1] * a0 + S0p[4] * a1) >> 4;
rows1p[tmp + 1] = (S1p[1] * a0 + S1p[4] * a1) >> 4;
rows0p[tmp + 2] = (S0p[2] * a0 + S0p[5] * a1) >> 4;
rows1p[tmp + 2] = (S1p[2] * a0 + S1p[5] * a1) >> 4;
ialphap += 2;
}
}
prev_sy1 = sy + 1;
// vresize
int16_t b0 = ibeta[0];
int16_t b1 = ibeta[1];
int16_t* rows0p = rows0;
int16_t* rows1p = rows1;
uint8_t* dp_ptr = dst + w_out * (dy);
int cnt = w_out >> 3;
int remain = w_out - (cnt << 3);
int16x4_t _b0 = vdup_n_s16(b0);
int16x4_t _b1 = vdup_n_s16(b1);
int32x4_t _v2 = vdupq_n_s32(2);
for (cnt = w_out >> 3; cnt > 0; cnt--) {
int16x4_t _rows0p_sr4 = vld1_s16(rows0p);
int16x4_t _rows1p_sr4 = vld1_s16(rows1p);
int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4);
int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4);
int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0);
int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1);
int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0);
int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1);
int32x4_t _acc = _v2;
_acc = vsraq_n_s32(
_acc, _rows0p_sr4_mb0, 16); // _acc >> 16 + _rows0p_sr4_mb0 >> 16
_acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16);
int32x4_t _acc_1 = _v2;
_acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16);
_acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16);
int16x4_t _acc16 = vshrn_n_s32(_acc, 2); // _acc >> 2
int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2);
uint8x8_t _dout = vqmovun_s16(vcombine_s16(_acc16, _acc16_1));
vst1_u8(dp_ptr, _dout);
dp_ptr += 8;
rows0p += 8;
rows1p += 8;
}
for (; remain; --remain) {
// D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS;
*dp_ptr++ =
(uint8_t)(((int16_t)((b0 * (int16_t)(*rows0p++)) >> 16) +
(int16_t)((b1 * (int16_t)(*rows1p++)) >> 16) + 2) >>
2);
}
ibeta += 2;
}
delete[] buf;
delete[] rowsbuf0;
delete[] rowsbuf1;
}
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/tests/cv/anakin/cv_utils.h"
void bgr_to_tensor_hwc(const uint8_t* bgr,
Tensor& output, // NOLINT
int width,
int height,
float* means,
float* scales) {
int size = width * height;
float* ptr0 = output.mutable_data<float>();
float r_means = means[0];
float g_means = means[1];
float b_means = means[2];
float r_scales = scales[0];
float g_scales = scales[1];
float b_scales = scales[2];
int w = width;
int dim8 = w >> 3;
int remain = w - (dim8 << 3);
float32x4_t vrmean = vdupq_n_f32(r_means);
float32x4_t vgmean = vdupq_n_f32(g_means);
float32x4_t vbmean = vdupq_n_f32(b_means);
float32x4_t vrscale = vdupq_n_f32(r_scales);
float32x4_t vgscale = vdupq_n_f32(g_scales);
float32x4_t vbscale = vdupq_n_f32(b_scales);
for (int i = 0; i < height; i++) {
const uint8_t* ptr_bgr = bgr + i * width * 3;
float* ptr0_b = ptr0 + i * width;
float* ptr1_g = ptr0_b + size;
float* ptr2_r = ptr1_g + size;
for (int j = 0; j < dim8; j++) {
uint8x8x3_t vbgr = vld3_u8(ptr_bgr);
uint8x8_t vb = vbgr.val[0];
uint8x8_t vg = vbgr.val[1];
uint8x8_t vr = vbgr.val[2];
uint16x8_t vb_16 = vmovl_u8(vb);
uint16x8_t vg_16 = vmovl_u8(vg);
uint16x8_t vr_16 = vmovl_u8(vr);
uint32x4_t vb_low_32 = vmovl_u16(vget_low_u16(vb_16));
uint32x4_t vg_low_32 = vmovl_u16(vget_low_u16(vg_16));
uint32x4_t vr_low_32 = vmovl_u16(vget_low_u16(vr_16));
uint32x4_t vb_high_32 = vmovl_u16(vget_high_u16(vb_16));
uint32x4_t vg_high_32 = vmovl_u16(vget_high_u16(vg_16));
uint32x4_t vr_high_32 = vmovl_u16(vget_high_u16(vr_16));
float32x4_t vb_low_f32 = vcvtq_f32_u32(vb_low_32);
float32x4_t vr_low_f32 = vcvtq_f32_u32(vr_low_32);
float32x4_t vg_low_f32 = vcvtq_f32_u32(vg_low_32);
float32x4_t vb_high_f32 = vcvtq_f32_u32(vb_high_32);
float32x4_t vg_high_f32 = vcvtq_f32_u32(vg_high_32);
float32x4_t vr_high_f32 = vcvtq_f32_u32(vr_high_32);
vb_low_f32 = vsubq_f32(vb_low_f32, vbmean);
vg_low_f32 = vsubq_f32(vg_low_f32, vgmean);
vr_low_f32 = vsubq_f32(vr_low_f32, vrmean);
vb_high_f32 = vsubq_f32(vb_high_f32, vbmean);
vg_high_f32 = vsubq_f32(vg_high_f32, vgmean);
vr_high_f32 = vsubq_f32(vr_high_f32, vrmean);
vb_low_f32 = vmulq_f32(vb_low_f32, vbscale);
vg_low_f32 = vmulq_f32(vg_low_f32, vgscale);
vr_low_f32 = vmulq_f32(vr_low_f32, vrscale);
vb_high_f32 = vmulq_f32(vb_high_f32, vbscale);
vg_high_f32 = vmulq_f32(vg_high_f32, vgscale);
vr_high_f32 = vmulq_f32(vr_high_f32, vrscale);
vst1q_f32(ptr0_b, vb_low_f32);
vst1q_f32(ptr1_g, vg_low_f32);
vst1q_f32(ptr2_r, vr_low_f32);
ptr_bgr += 24;
vst1q_f32(ptr0_b + 4, vb_high_f32);
vst1q_f32(ptr1_g + 4, vg_high_f32);
vst1q_f32(ptr2_r + 4, vr_high_f32);
ptr0_b += 8;
ptr1_g += 8;
ptr2_r += 8;
}
for (int j = 0; j < remain; j++) {
*ptr0_b++ = (*ptr_bgr - b_means) * b_scales; // NOLINT
ptr_bgr++;
*ptr1_g++ = (*ptr_bgr - g_means) * g_scales; // NOLINT
ptr_bgr++;
*ptr2_r++ = (*ptr_bgr - r_means) * r_scales; // NOLINT
ptr_bgr++;
}
}
}
此差异已折叠。
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <limits.h>
#include <math.h>
#include "lite/tests/cv/anakin/cv_utils.h"
void resize_four_channel(
const uint8_t* src, int w_in, int h_in, uint8_t* dst, int w_out, int h_out);
void bgra_resize(const uint8_t* src,
uint8_t* dst,
int w_in,
int h_in,
int w_out,
int h_out) {
if (w_out == w_in && h_out == h_in) {
memcpy(dst, src, sizeof(char) * w_in * h_in * 4);
return;
}
// y
resize_four_channel(src, w_in * 4, h_in, dst, w_out * 4, h_out);
}
void resize_four_channel(const uint8_t* src,
int w_in,
int h_in,
uint8_t* dst,
int w_out,
int h_out) {
const int resize_coef_bits = 11;
const int resize_coef_scale = 1 << resize_coef_bits;
double scale_x = static_cast<double>(w_in) / w_out;
double scale_y = static_cast<double>(h_in) / h_out;
int* buf = new int[w_out * 2 + h_out * 2];
int* xofs = buf; // new int[w];
int* yofs = buf + w_out; // new int[h];
int16_t* ialpha =
reinterpret_cast<int16_t*>(buf + w_out + h_out); // new int16_t[w * 2];
int16_t* ibeta = reinterpret_cast<int16_t*>(buf + w_out * 2 +
h_out); // new int16_t[h * 2];
float fx = 0.f;
float fy = 0.f;
int sx = 0.f;
int sy = 0.f;
#define SATURATE_CAST_int16_t(X) \
(int16_t)::std::min( \
::std::max(static_cast<int>(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), \
SHRT_MAX);
for (int dx = 0; dx < w_out / 4; dx++) {
fx = static_cast<float>((dx + 0.5) * scale_x - 0.5);
sx = floor(fx);
fx -= sx;
if (sx < 0) {
sx = 0;
fx = 0.f;
}
if (sx >= w_in - 1) {
sx = w_in - 2;
fx = 1.f;
}
xofs[dx] = sx * 4;
float a0 = (1.f - fx) * resize_coef_scale;
float a1 = fx * resize_coef_scale;
ialpha[dx * 2] = SATURATE_CAST_int16_t(a0);
ialpha[dx * 2 + 1] = SATURATE_CAST_int16_t(a1);
}
for (int dy = 0; dy < h_out; dy++) {
fy = static_cast<float>((dy + 0.5) * scale_y - 0.5);
sy = floor(fy);
fy -= sy;
if (sy < 0) {
sy = 0;
fy = 0.f;
}
if (sy >= h_in - 1) {
sy = h_in - 2;
fy = 1.f;
}
yofs[dy] = sy;
float b0 = (1.f - fy) * resize_coef_scale;
float b1 = fy * resize_coef_scale;
ibeta[dy * 2] = SATURATE_CAST_int16_t(b0);
ibeta[dy * 2 + 1] = SATURATE_CAST_int16_t(b1);
}
#undef SATURATE_CAST_int16_t
// loop body
int16_t* rowsbuf0 = new int16_t[w_out + 1];
int16_t* rowsbuf1 = new int16_t[w_out + 1];
int16_t* rows0 = rowsbuf0;
int16_t* rows1 = rowsbuf1;
int prev_sy1 = -1;
for (int dy = 0; dy < h_out; dy++) {
int sy = yofs[dy];
if (sy == prev_sy1) {
// hresize one row
int16_t* rows0_old = rows0;
rows0 = rows1;
rows1 = rows0_old;
const uint8_t* S1 = src + w_in * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows1p = rows1;
for (int dx = 0; dx < w_out / 4; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S1p = S1 + sx;
int tmp = dx * 4;
rows1p[tmp] = (S1p[0] * a0 + S1p[4] * a1) >> 4;
rows1p[tmp + 1] = (S1p[1] * a0 + S1p[5] * a1) >> 4;
rows1p[tmp + 2] = (S1p[2] * a0 + S1p[6] * a1) >> 4;
rows1p[tmp + 3] = (S1p[3] * a0 + S1p[7] * a1) >> 4;
ialphap += 2;
}
} else {
// hresize two rows
const uint8_t* S0 = src + w_in * (sy);
const uint8_t* S1 = src + w_in * (sy + 1);
const int16_t* ialphap = ialpha;
int16_t* rows0p = rows0;
int16_t* rows1p = rows1;
for (int dx = 0; dx < w_out / 4; dx++) {
int sx = xofs[dx];
int16_t a0 = ialphap[0];
int16_t a1 = ialphap[1];
const uint8_t* S0p = S0 + sx;
const uint8_t* S1p = S1 + sx;
int tmp = dx * 4;
rows0p[tmp] = (S0p[0] * a0 + S0p[4] * a1) >> 4;
rows1p[tmp] = (S1p[0] * a0 + S1p[4] * a1) >> 4;
rows0p[tmp + 1] = (S0p[1] * a0 + S0p[5] * a1) >> 4;
rows1p[tmp + 1] = (S1p[1] * a0 + S1p[5] * a1) >> 4;
rows0p[tmp + 2] = (S0p[2] * a0 + S0p[6] * a1) >> 4;
rows1p[tmp + 2] = (S1p[2] * a0 + S1p[6] * a1) >> 4;
rows0p[tmp + 3] = (S0p[3] * a0 + S0p[7] * a1) >> 4;
rows1p[tmp + 3] = (S1p[3] * a0 + S1p[7] * a1) >> 4;
ialphap += 2;
}
}
prev_sy1 = sy + 1;
// vresize
int16_t b0 = ibeta[0];
int16_t b1 = ibeta[1];
int16_t* rows0p = rows0;
int16_t* rows1p = rows1;
uint8_t* dp_ptr = dst + w_out * (dy);
int cnt = w_out >> 3;
int remain = w_out - (cnt << 3);
int16x4_t _b0 = vdup_n_s16(b0);
int16x4_t _b1 = vdup_n_s16(b1);
int32x4_t _v2 = vdupq_n_s32(2);
for (cnt = w_out >> 3; cnt > 0; cnt--) {
int16x4_t _rows0p_sr4 = vld1_s16(rows0p);
int16x4_t _rows1p_sr4 = vld1_s16(rows1p);
int16x4_t _rows0p_1_sr4 = vld1_s16(rows0p + 4);
int16x4_t _rows1p_1_sr4 = vld1_s16(rows1p + 4);
int32x4_t _rows0p_sr4_mb0 = vmull_s16(_rows0p_sr4, _b0);
int32x4_t _rows1p_sr4_mb1 = vmull_s16(_rows1p_sr4, _b1);
int32x4_t _rows0p_1_sr4_mb0 = vmull_s16(_rows0p_1_sr4, _b0);
int32x4_t _rows1p_1_sr4_mb1 = vmull_s16(_rows1p_1_sr4, _b1);
int32x4_t _acc = _v2;
_acc = vsraq_n_s32(
_acc, _rows0p_sr4_mb0, 16); // _acc >> 16 + _rows0p_sr4_mb0 >> 16
_acc = vsraq_n_s32(_acc, _rows1p_sr4_mb1, 16);
int32x4_t _acc_1 = _v2;
_acc_1 = vsraq_n_s32(_acc_1, _rows0p_1_sr4_mb0, 16);
_acc_1 = vsraq_n_s32(_acc_1, _rows1p_1_sr4_mb1, 16);
int16x4_t _acc16 = vshrn_n_s32(_acc, 2); // _acc >> 2
int16x4_t _acc16_1 = vshrn_n_s32(_acc_1, 2);
uint8x8_t _dout = vqmovun_s16(vcombine_s16(_acc16, _acc16_1));
vst1_u8(dp_ptr, _dout);
dp_ptr += 8;
rows0p += 8;
rows1p += 8;
}
for (; remain; --remain) {
// D[x] = (rows0[x]*b0 + rows1[x]*b1) >> INTER_RESIZE_COEF_BITS;
*dp_ptr++ =
(uint8_t)(((int16_t)((b0 * (int16_t)(*rows0p++)) >> 16) +
(int16_t)((b1 * (int16_t)(*rows1p++)) >> 16) + 2) >>
2);
}
ibeta += 2;
}
delete[] buf;
delete[] rowsbuf0;
delete[] rowsbuf1;
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/tests/cv/anakin/cv_utils.h"
void rotate90_hwc_bgra(const uint8_t* src, uint8_t* dst, int w_in, int h_in);
void rotate270_hwc_bgra(const uint8_t* src, uint8_t* dst, int w_in, int h_in);
void rotate180_hwc_bgra(const uint8_t* src, uint8_t* dst, int w_in, int h_in);
void bgra_rotate_hwc(
const uint8_t* src, uint8_t* dst, int w_in, int h_in, int angle) {
if (angle == 90) {
rotate90_hwc_bgra(src, dst, w_in, h_in);
}
if (angle == 270) {
rotate270_hwc_bgra(src, dst, w_in, h_in);
}
if (angle == 180) {
rotate180_hwc_bgra(src, dst, w_in, h_in);
}
}
/*
bgr1 bgr2 bgr3
bgr4 bgr5 bgr6
bgr7 bgr8 bgr9
rotate:
bgr7 bgr4 bgr1
bgr8 bgr5 bgr2
bgr9 bgr6 bgr3
*/
void rotate90_hwc_bgra(const uint8_t* src, uint8_t* dst, int w_in, int h_in) {
int w_out = h_in;
int h_out = w_in;
int win = w_in * 4;
int wout = w_out * 4;
int hremain = h_in % 8;
int stride_h = 4 * win;
int stride_h_w = 4 * win - 32;
int ww = w_out - 8;
// block 8*8. -- 8*8
int i = 0;
for (i = 0; i < h_in - 7; i += 8) {
const uint8_t* inptr0 = src + i * win;
const uint8_t* inptr1 = inptr0 + win;
const uint8_t* inptr2 = inptr1 + win;
const uint8_t* inptr3 = inptr2 + win;
const uint8_t* inptr4 = inptr3 + win;
const uint8_t* inptr5 = inptr4 + win;
const uint8_t* inptr6 = inptr5 + win;
const uint8_t* inptr7 = inptr6 + win;
#ifdef __aarch64__
asm volatile(
"prfm pldl1keep, [%[ptr0]] \n"
"prfm pldl1keep, [%[ptr0], #64] \n"
"prfm pldl1keep, [%[ptr1]] \n"
"prfm pldl1keep, [%[ptr1], #64] \n"
"prfm pldl1keep, [%[ptr2]] \n"
"prfm pldl1keep, [%[ptr2], #64] \n"
"prfm pldl1keep, [%[ptr3]] \n"
"prfm pldl1keep, [%[ptr3], #64] \n"
"prfm pldl1keep, [%[ptr4]] \n"
"prfm pldl1keep, [%[ptr4], #64] \n"
"prfm pldl1keep, [%[ptr5]] \n"
"prfm pldl1keep, [%[ptr5], #64] \n"
"prfm pldl1keep, [%[ptr6]] \n"
"prfm pldl1keep, [%[ptr6], #64] \n"
"prfm pldl1keep, [%[ptr7]] \n"
"prfm pldl1keep, [%[ptr7], #64] \n"
:
: [ptr0] "r"(inptr0),
[ptr1] "r"(inptr1),
[ptr2] "r"(inptr2),
[ptr3] "r"(inptr3),
[ptr4] "r"(inptr4),
[ptr5] "r"(inptr5),
[ptr6] "r"(inptr6),
[ptr7] "r"(inptr7)
: "memory");
#else
asm volatile(
"pld [%[ptr0]] @ preload a, 64byte\n"
"pld [%[ptr0], #64] @ preload a, 64byte\n"
"pld [%[ptr1]] @ preload a, 64byte\n"
"pld [%[ptr1], #64] @ preload a, 64byte\n"
"pld [%[ptr2]] @ preload a, 64byte\n"
"pld [%[ptr2], #64] @ preload a, 64byte\n"
"pld [%[ptr3]] @ preload a, 64byte\n"
"pld [%[ptr3], #64] @ preload a, 64byte\n"
"pld [%[ptr4]] @ preload a, 64byte\n"
"pld [%[ptr4], #64] @ preload a, 64byte\n"
"pld [%[ptr5]] @ preload a, 64byte\n"
"pld [%[ptr5], #64] @ preload a, 64byte\n"
"pld [%[ptr6]] @ preload a, 64byte\n"
"pld [%[ptr6], #64] @ preload a, 64byte\n"
"pld [%[ptr7]] @ preload a, 64byte\n"
"pld [%[ptr7], #64] @ preload a, 64byte\n"
:
: [ptr0] "r"(inptr0),
[ptr1] "r"(inptr1),
[ptr2] "r"(inptr2),
[ptr3] "r"(inptr3),
[ptr4] "r"(inptr4),
[ptr5] "r"(inptr5),
[ptr6] "r"(inptr6),
[ptr7] "r"(inptr7)
: "memory");
#endif
int j = 0;
for (; j < w_in; j++) {
int tmpx = (ww - i) * 4;
uint8_t* outptr = dst + j * wout + tmpx;
*outptr++ = *inptr7++;
*outptr++ = *inptr7++;
*outptr++ = *inptr7++;
*outptr++ = *inptr7++;
*outptr++ = *inptr6++;
*outptr++ = *inptr6++;
*outptr++ = *inptr6++;
*outptr++ = *inptr6++;
*outptr++ = *inptr5++;
*outptr++ = *inptr5++;
*outptr++ = *inptr5++;
*outptr++ = *inptr5++;
*outptr++ = *inptr4++;
*outptr++ = *inptr4++;
*outptr++ = *inptr4++;
*outptr++ = *inptr4++;
*outptr++ = *inptr3++;
*outptr++ = *inptr3++;
*outptr++ = *inptr3++;
*outptr++ = *inptr3++;
*outptr++ = *inptr2++;
*outptr++ = *inptr2++;
*outptr++ = *inptr2++;
*outptr++ = *inptr2++;
*outptr++ = *inptr1++;
*outptr++ = *inptr1++;
*outptr++ = *inptr1++;
*outptr++ = *inptr1++;
*outptr++ = *inptr0++;
*outptr++ = *inptr0++;
*outptr++ = *inptr0++;
*outptr++ = *inptr0++;
}
}
ww = w_out - 1;
for (; i < h_in; i++) {
const uint8_t* inptr0 = src + i * win;
for (int j = 0; j < w_in; j++) {
uint8_t* outptr0 = dst + j * wout + (ww - i) * 4;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
}
}
}
/*
bgr1 bgr2 bgr3
bgr4 bgr5 bgr6
bgr7 bgr8 bgr9
rotate:
bgr3 bgr6 bgr9
bgr2 bgr5 bgr8
bgr1 bgr4 bgr7
*/
// dst = (h_out - 1) * w_out
// 类似rotate90,将输出结果倒着输出 或者先rotate90,然后沿Y轴翻转
void rotate270_hwc_bgra(const uint8_t* src, uint8_t* dst, int w_in, int h_in) {
int w_out = h_in;
int h_out = w_in;
int win = w_in * 4;
int wout = w_out * 4;
int hremain = h_in % 8;
int stride_h = 4 * win;
int stride_h_w = 4 * win - 32;
int hout = h_out - 1;
// block 8*8. -- 8*8
int i = 0;
for (; i < h_in - 7; i += 8) {
const uint8_t* inptr0 = src + i * win;
const uint8_t* inptr1 = inptr0 + win;
const uint8_t* inptr2 = inptr1 + win;
const uint8_t* inptr3 = inptr2 + win;
const uint8_t* inptr4 = inptr3 + win;
const uint8_t* inptr5 = inptr4 + win;
const uint8_t* inptr6 = inptr5 + win;
const uint8_t* inptr7 = inptr6 + win;
int j = 0;
#ifdef __aarch64__
asm volatile(
"prfm pldl1keep, [%[ptr0]] \n"
"prfm pldl1keep, [%[ptr0], #64] \n"
"prfm pldl1keep, [%[ptr1]] \n"
"prfm pldl1keep, [%[ptr1], #64] \n"
"prfm pldl1keep, [%[ptr2]] \n"
"prfm pldl1keep, [%[ptr2], #64] \n"
"prfm pldl1keep, [%[ptr3]] \n"
"prfm pldl1keep, [%[ptr3], #64] \n"
"prfm pldl1keep, [%[ptr4]] \n"
"prfm pldl1keep, [%[ptr4], #64] \n"
"prfm pldl1keep, [%[ptr5]] \n"
"prfm pldl1keep, [%[ptr5], #64] \n"
"prfm pldl1keep, [%[ptr6]] \n"
"prfm pldl1keep, [%[ptr6], #64] \n"
"prfm pldl1keep, [%[ptr7]] \n"
"prfm pldl1keep, [%[ptr7], #64] \n"
:
: [ptr0] "r"(inptr0),
[ptr1] "r"(inptr1),
[ptr2] "r"(inptr2),
[ptr3] "r"(inptr3),
[ptr4] "r"(inptr4),
[ptr5] "r"(inptr5),
[ptr6] "r"(inptr6),
[ptr7] "r"(inptr7)
: "memory");
#else
asm volatile(
"pld [%[ptr0]] @ preload a, 64byte\n"
"pld [%[ptr0], #64] @ preload a, 64byte\n"
"pld [%[ptr1]] @ preload a, 64byte\n"
"pld [%[ptr1], #64] @ preload a, 64byte\n"
"pld [%[ptr2]] @ preload a, 64byte\n"
"pld [%[ptr2], #64] @ preload a, 64byte\n"
"pld [%[ptr3]] @ preload a, 64byte\n"
"pld [%[ptr3], #64] @ preload a, 64byte\n"
"pld [%[ptr4]] @ preload a, 64byte\n"
"pld [%[ptr4], #64] @ preload a, 64byte\n"
"pld [%[ptr5]] @ preload a, 64byte\n"
"pld [%[ptr5], #64] @ preload a, 64byte\n"
"pld [%[ptr6]] @ preload a, 64byte\n"
"pld [%[ptr6], #64] @ preload a, 64byte\n"
"pld [%[ptr7]] @ preload a, 64byte\n"
"pld [%[ptr7], #64] @ preload a, 64byte\n"
:
: [ptr0] "r"(inptr0),
[ptr1] "r"(inptr1),
[ptr2] "r"(inptr2),
[ptr3] "r"(inptr3),
[ptr4] "r"(inptr4),
[ptr5] "r"(inptr5),
[ptr6] "r"(inptr6),
[ptr7] "r"(inptr7)
: "memory");
#endif
for (; j < w_in; j++) {
int tmpx = i * 4;
uint8_t* outptr = dst + (hout - j) * wout + tmpx;
*outptr++ = *inptr0++;
*outptr++ = *inptr0++;
*outptr++ = *inptr0++;
*outptr++ = *inptr0++;
*outptr++ = *inptr1++;
*outptr++ = *inptr1++;
*outptr++ = *inptr1++;
*outptr++ = *inptr1++;
*outptr++ = *inptr2++;
*outptr++ = *inptr2++;
*outptr++ = *inptr2++;
*outptr++ = *inptr2++;
*outptr++ = *inptr3++;
*outptr++ = *inptr3++;
*outptr++ = *inptr3++;
*outptr++ = *inptr3++;
*outptr++ = *inptr4++;
*outptr++ = *inptr4++;
*outptr++ = *inptr4++;
*outptr++ = *inptr4++;
*outptr++ = *inptr5++;
*outptr++ = *inptr5++;
*outptr++ = *inptr5++;
*outptr++ = *inptr5++;
*outptr++ = *inptr6++;
*outptr++ = *inptr6++;
*outptr++ = *inptr6++;
*outptr++ = *inptr6++;
*outptr++ = *inptr7++;
*outptr++ = *inptr7++;
*outptr++ = *inptr7++;
*outptr++ = *inptr7++;
}
}
for (; i < h_in; i++) {
const uint8_t* inptr0 = src + i * win;
for (int j = 0; j < w_in; j++) {
uint8_t* outptr0 = dst + (hout - j) * wout + i * 4;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
}
}
}
/*
bgr1 bgr2 bgr3
bgr4 bgr5 bgr6
bgr7 bgr8 bgr9
rotate:
bgr9 bgr8 bgr7
bgr6 bgr5 bgr4
bgr3 bgr2 bgr1
*/
// filp y
void rotate180_hwc_bgra(const uint8_t* src, uint8_t* dst, int w, int h_in) {
int w_in = w * 4;
uint8_t zerobuff[w_in]; // NOLINT
memset(zerobuff, 0, w_in * sizeof(uint8_t));
int stride_w = 4;
// 4*8
for (int i = 0; i < h_in; i += 4) {
const uint8_t* inptr0 = src + i * w_in;
const uint8_t* inptr1 = inptr0 + w_in;
const uint8_t* inptr2 = inptr1 + w_in;
const uint8_t* inptr3 = inptr2 + w_in;
uint8_t* outptr0 = dst + (h_in - i) * w_in - stride_w; // last
uint8_t* outptr1 = outptr0 - w_in;
uint8_t* outptr2 = outptr1 - w_in;
uint8_t* outptr3 = outptr2 - w_in;
if (i + 3 >= h_in) {
switch ((i + 3) - h_in) {
case 3:
inptr0 = zerobuff;
case 2:
inptr1 = zerobuff;
case 1:
inptr2 = zerobuff;
case 0:
inptr3 = zerobuff;
default:
break;
}
}
#ifdef __aarch64__
asm volatile(
"prfm pldl1keep, [%[ptr0]] \n"
"prfm pldl1keep, [%[ptr0], #64] \n"
"prfm pldl1keep, [%[ptr1]] \n"
"prfm pldl1keep, [%[ptr1], #64] \n"
"prfm pldl1keep, [%[ptr2]] \n"
"prfm pldl1keep, [%[ptr2], #64] \n"
"prfm pldl1keep, [%[ptr3]] \n"
"prfm pldl1keep, [%[ptr3], #64] \n"
:
: [ptr0] "r"(inptr0),
[ptr1] "r"(inptr1),
[ptr2] "r"(inptr2),
[ptr3] "r"(inptr3)
: "memory");
#else
asm volatile(
"pld [%[ptr0]] @ preload a, 64byte\n"
"pld [%[ptr0], #64] @ preload a, 64byte\n"
"pld [%[ptr1]] @ preload a, 64byte\n"
"pld [%[ptr1], #64] @ preload a, 64byte\n"
"pld [%[ptr2]] @ preload a, 64byte\n"
"pld [%[ptr2], #64] @ preload a, 64byte\n"
"pld [%[ptr3]] @ preload a, 64byte\n"
"pld [%[ptr3], #64] @ preload a, 64byte\n"
:
: [ptr0] "r"(inptr0),
[ptr1] "r"(inptr1),
[ptr2] "r"(inptr2),
[ptr3] "r"(inptr3)
: "memory");
#endif
int j = 0;
for (; j < w; j++) {
if (i + 3 >= h_in) {
switch ((i + 3) - h_in) {
case 0:
*outptr2++ = *inptr2++;
*outptr2++ = *inptr2++;
*outptr2++ = *inptr2++;
*outptr2++ = *inptr2++;
outptr2 -= 8;
case 1:
*outptr1++ = *inptr1++;
*outptr1++ = *inptr1++;
*outptr1++ = *inptr1++;
*outptr1++ = *inptr1++;
outptr1 -= 8;
case 2:
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
outptr0 -= 8;
case 3:
// inptr3 = zerobuff;
default:
break;
}
} else {
*outptr3++ = *inptr3++;
*outptr3++ = *inptr3++;
*outptr3++ = *inptr3++;
*outptr3++ = *inptr3++;
outptr3 -= 8;
*outptr2++ = *inptr2++;
*outptr2++ = *inptr2++;
*outptr2++ = *inptr2++;
*outptr2++ = *inptr2++;
outptr2 -= 8;
*outptr1++ = *inptr1++;
*outptr1++ = *inptr1++;
*outptr1++ = *inptr1++;
*outptr1++ = *inptr1++;
outptr1 -= 8;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
*outptr0++ = *inptr0++;
outptr0 -= 8;
}
}
}
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/tests/cv/anakin/cv_utils.h"
void bgra_to_tensor_hwc(const uint8_t* bgr,
Tensor& output, // NOLINT
int width,
int height,
float* means,
float* scales) {
int size = width * height;
float* ptr0 = output.mutable_data<float>();
float r_means = means[0];
float g_means = means[1];
float b_means = means[2];
float r_scales = scales[0];
float g_scales = scales[1];
float b_scales = scales[2];
int dim8 = width >> 3;
int remain = wwidth - (dim8 << 3);
float32x4_t vrmean = vdupq_n_f32(r_means);
float32x4_t vgmean = vdupq_n_f32(g_means);
float32x4_t vbmean = vdupq_n_f32(b_means);
float32x4_t vrscale = vdupq_n_f32(r_scales);
float32x4_t vgscale = vdupq_n_f32(g_scales);
float32x4_t vbscale = vdupq_n_f32(b_scales);
for (int i = 0; i < height; i++) {
const uint8_t* ptr_bgr = bgr + i * width * 4;
float* ptr0_b = ptr0 + i * width;
float* ptr1_g = ptr0_b + size;
float* ptr2_r = ptr1_g + size;
for (int j = 0; j < dim8; j++) {
uint8x8x4_t vbgr = vld4_u8(ptr_bgr);
uint8x8_t vb = vbgr.val[0];
uint8x8_t vg = vbgr.val[1];
uint8x8_t vr = vbgr.val[2];
uint16x8_t vb_16 = vmovl_u8(vb);
uint16x8_t vg_16 = vmovl_u8(vg);
uint16x8_t vr_16 = vmovl_u8(vr);
uint32x4_t vb_low_32 = vmovl_u16(vget_low_u16(vb_16));
uint32x4_t vg_low_32 = vmovl_u16(vget_low_u16(vg_16));
uint32x4_t vr_low_32 = vmovl_u16(vget_low_u16(vr_16));
uint32x4_t vb_high_32 = vmovl_u16(vget_high_u16(vb_16));
uint32x4_t vg_high_32 = vmovl_u16(vget_high_u16(vg_16));
uint32x4_t vr_high_32 = vmovl_u16(vget_high_u16(vr_16));
float32x4_t vb_low_f32 = vcvtq_f32_u32(vb_low_32);
float32x4_t vr_low_f32 = vcvtq_f32_u32(vr_low_32);
float32x4_t vg_low_f32 = vcvtq_f32_u32(vg_low_32);
float32x4_t vb_high_f32 = vcvtq_f32_u32(vb_high_32);
float32x4_t vg_high_f32 = vcvtq_f32_u32(vg_high_32);
float32x4_t vr_high_f32 = vcvtq_f32_u32(vr_high_32);
vb_low_f32 = vsubq_f32(vb_low_f32, vbmean);
vg_low_f32 = vsubq_f32(vg_low_f32, vgmean);
vr_low_f32 = vsubq_f32(vr_low_f32, vrmean);
vb_high_f32 = vsubq_f32(vb_high_f32, vbmean);
vg_high_f32 = vsubq_f32(vg_high_f32, vgmean);
vr_high_f32 = vsubq_f32(vr_high_f32, vrmean);
vb_low_f32 = vmulq_f32(vb_low_f32, vbscale);
vg_low_f32 = vmulq_f32(vg_low_f32, vgscale);
vr_low_f32 = vmulq_f32(vr_low_f32, vrscale);
vb_high_f32 = vmulq_f32(vb_high_f32, vbscale);
vg_high_f32 = vmulq_f32(vg_high_f32, vgscale);
vr_high_f32 = vmulq_f32(vr_high_f32, vrscale);
vst1q_f32(ptr0_b, vb_low_f32);
vst1q_f32(ptr1_g, vg_low_f32);
vst1q_f32(ptr2_r, vr_low_f32);
ptr_bgr += 32;
vst1q_f32(ptr0_b + 4, vb_high_f32);
vst1q_f32(ptr1_g + 4, vg_high_f32);
vst1q_f32(ptr2_r + 4, vr_high_f32);
ptr0_b += 8;
ptr1_g += 8;
ptr2_r += 8;
}
for (int j = 0; j < remain; j++) {
*ptr0_b++ = (*ptr_bgr - b_means) * b_scales;
ptr_bgr++;
*ptr1_g++ = (*ptr_bgr - g_means) * g_scales;
ptr_bgr++;
*ptr2_r++ = (*ptr_bgr - r_means) * r_scales;
ptr_bgr++;
ptr_bgr++;
}
}
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/tests/cv/anakin/cv_utils.h"
void image_basic_convert(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
ImageFormat dstFormat,
int srcw,
int srch,
int out_size) {
if (srcFormat == dstFormat) {
// copy
memcpy(dst, src, sizeof(uint8_t) * out_size);
return;
} else {
if (srcFormat == ImageFormat::NV12 &&
(dstFormat == ImageFormat::BGR || dstFormat == ImageFormat::RGB)) {
nv12_to_bgr(src, dst, srcw, srch);
} else if (srcFormat == ImageFormat::NV21 &&
(dstFormat == ImageFormat::BGR ||
dstFormat == ImageFormat::RGB)) {
nv21_to_bgr(src, dst, srcw, srch);
} else if (srcFormat == ImageFormat::NV12 &&
(dstFormat == ImageFormat::BGRA ||
dstFormat == ImageFormat::RGBA)) {
nv12_to_bgra(src, dst, srcw, srch);
} else if (srcFormat == ImageFormat::NV21 &&
(dstFormat == ImageFormat::BGRA ||
dstFormat == ImageFormat::RGBA)) {
nv21_to_bgra(src, dst, srcw, srch);
} else {
printf("bais-anakin srcFormat: %d, dstFormat: %d does not support! \n",
srcFormat,
dstFormat);
}
}
}
void image_basic_resize(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
int dstw,
int dsth) {
int size = srcw * srch;
if (srcw == dstw && srch == dsth) {
if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) {
size = srcw * (static_cast<int>(1.5 * srch));
} else if (srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB) {
size = 3 * srcw * srch;
} else if (srcFormat == ImageFormat::BGRA ||
srcFormat == ImageFormat::RGBA) {
size = 4 * srcw * srch;
}
memcpy(dst, src, sizeof(uint8_t) * size);
return;
} else {
if (srcFormat == ImageFormat::NV12 || srcFormat == ImageFormat::NV21) {
nv21_resize(src, dst, srcw, srch, dstw, dsth);
} else if (srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB) {
bgr_resize(src, dst, srcw, srch, dstw, dsth);
} else if (srcFormat == ImageFormat::BGRA ||
srcFormat == ImageFormat::RGBA) {
bgra_resize(src, dst, srcw, srch, dstw, dsth);
} else {
printf("anakin doesn't support this type: %d\n",
static_cast<int>(srcFormat));
}
}
}
void image_basic_flip(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
int flip_num) {
if (flip_num == -1) {
flip_num = 0; // xy
} else if (flip_num == 0) {
flip_num = 1; // x
} else if (flip_num == 1) {
flip_num = -1; // y
}
if (srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB) {
bgr_flip_hwc(src, dst, srcw, srch, flip_num);
} else if (srcFormat == ImageFormat::BGRA || srcFormat == ImageFormat::RGBA) {
bgra_flip_hwc(src, dst, srcw, srch, flip_num);
} else {
printf("anakin doesn't support this type: %d\n",
static_cast<int>(srcFormat));
}
}
void image_basic_rotate(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
float rotate_num) {
if (srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB) {
bgr_rotate_hwc(src, dst, srcw, srch, rotate_num);
} else if (srcFormat == ImageFormat::BGRA || srcFormat == ImageFormat::RGBA) {
bgra_rotate_hwc(src, dst, srcw, srch, rotate_num);
} else {
printf("anakin doesn't support this type: %d\n",
static_cast<int>(srcFormat));
}
}
void image_basic_to_tensor(const uint8_t* in_data,
Tensor dst,
ImageFormat srcFormat,
LayoutType layout,
int srcw,
int srch,
float* means,
float* scales) {
if (layout == LayoutType::kNCHW &&
(srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB)) {
bgr_to_tensor_hwc(in_data, dst, srcw, srch, means, scales);
} else if (layout == LayoutType::kNCHW && (srcFormat == ImageFormat::BGRA ||
srcFormat == ImageFormat::RGBA)) {
bgra_to_tensor_hwc(in_data, dst, srcw, srch, means, scales);
} else {
printf("anakin doesn't support this type: %d\n",
static_cast<int>(srcFormat));
}
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册