未验证 提交 e84406a7 编写于 作者: H HappyAngel 提交者: GitHub

[arm] add test_cv demo (#2691)

* add cv image process

* fix arm liunx build error

* add LITE_WITH_CV defien to make cv, test=develop

* fix cv format, annd add describe in utils/cv

* set LITE_WITH_CV=OFF in build.sh, test=develop

* delete cv_enum.h in utils/cv, push the contents in cv_ennum.h to paddle_image_preprocess.h, test=develop

* according to reviews to redefine paddle_image_preprocess.h, test=develop

* add detailed note of flipParam, test=develop

* fix format in paddle_image_preprocess.h, test=develop

* fix cmake error in llite/CMakeLists.txt, missing mkdir cxx, test=develop

* according to review change, test=develop

* add elemetnwise mul constant elimination and deconv+relu, deconv+batchnorm fusion, test=develop

* fix format, test=develop

* fix model_optimize bug, update concat and split op, speed up, test=develop

* update split speed, test=develop

* fix format, test=develop

* add classify demo inn demo/cxx/ , test=develop

* fix formart inn mobile_classify, test=develop

* delete some note and extra code, test=develop

* remove test.jpg and labels.txt, test=develop

* add test_cv in cxx/demo

* add test_cv READMEE, test=develoop

* add note info, flip only support x, y, xy;rotate only support 90, 180, 270; test=develop

* fix build error, paddle_cv_arm , test=develop

* add GRAY to RGBA(BGRA) convert and RGBA(BGRA)_to_Tensor, test=develop

* fix format from review, test=develop

* fix makefile format. test=devellop

* fix bbuuild v7 error, test=develop
上级 6135fd4a
...@@ -226,6 +226,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -226,6 +226,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/yolov3_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/yolov3_detection/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/yolov3_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/yolov3_detection/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_classify" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx" COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_classify" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_classify/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_classify/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_classify/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_classify/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/test_cv" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/test_cv/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/test_cv/Makefile"
) )
add_dependencies(publish_inference_android_cxx_demos logging gflags) add_dependencies(publish_inference_android_cxx_demos logging gflags)
add_dependencies(publish_inference_cxx_lib publish_inference_android_cxx_demos) add_dependencies(publish_inference_cxx_lib publish_inference_android_cxx_demos)
...@@ -243,6 +245,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM) ...@@ -243,6 +245,8 @@ if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND LITE_WITH_ARM)
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/yolov3_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/yolov3_detection/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/yolov3_detection/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/yolov3_detection/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_classify" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx" COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/mobile_classify" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_classify/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_classify/Makefile" COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/mobile_classify/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/mobile_classify/Makefile"
COMMAND cp -r "${CMAKE_SOURCE_DIR}/lite/demo/cxx/test_cv" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx"
COMMAND cp "${CMAKE_SOURCE_DIR}/lite/demo/cxx/makefiles/test_cv/Makefile.${ARM_TARGET_OS}.${ARM_TARGET_ARCH_ABI}" "${INFER_LITE_PUBLISH_ROOT}/demo/cxx/test_cv/Makefile"
) )
add_dependencies(tiny_publish_cxx_lib publish_inference_android_cxx_demos) add_dependencies(tiny_publish_cxx_lib publish_inference_android_cxx_demos)
endif() endif()
......
...@@ -35,6 +35,7 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR ARM_TARGE ...@@ -35,6 +35,7 @@ if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR ARM_TARGE
NPU_DEPS ${npu_kernels}) NPU_DEPS ${npu_kernels})
target_link_libraries(paddle_light_api_shared ${light_lib_DEPS} ${arm_kernels} ${npu_kernels}) target_link_libraries(paddle_light_api_shared ${light_lib_DEPS} ${arm_kernels} ${npu_kernels})
if (LITE_WITH_NPU) if (LITE_WITH_NPU)
# Strips the symbols of our protobuf functions to fix the conflicts during # Strips the symbols of our protobuf functions to fix the conflicts during
# loading HIAI builder libs (libhiai_ir.so and libhiai_ir_build.so) # loading HIAI builder libs (libhiai_ir.so and libhiai_ir_build.so)
...@@ -123,6 +124,7 @@ if(WITH_TESTING) ...@@ -123,6 +124,7 @@ if(WITH_TESTING)
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels} CUDA_DEPS ${cuda_kernels}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
...@@ -285,6 +287,7 @@ endif(LITE_ON_MODEL_OPTIMIZE_TOOL) ...@@ -285,6 +287,7 @@ endif(LITE_ON_MODEL_OPTIMIZE_TOOL)
lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light lite_cc_test(test_paddle_api SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light
${ops} ${ops}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
...@@ -307,9 +310,11 @@ if(NOT IOS) ...@@ -307,9 +310,11 @@ if(NOT IOS)
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
X86_DEPS ${x86_kernels} X86_DEPS ${x86_kernels}
CUDA_DEPS ${cuda_kernels}) CUDA_DEPS ${cuda_kernels})
lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils lite_cc_binary(benchmark_bin SRCS benchmark.cc DEPS paddle_api_full paddle_api_light gflags utils
${ops} ${host_kernels} ${ops} ${host_kernels}
ARM_DEPS ${arm_kernels} ARM_DEPS ${arm_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
XPU_DEPS ${xpu_kernels} XPU_DEPS ${xpu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
......
...@@ -86,6 +86,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes, ...@@ -86,6 +86,7 @@ void Run(const std::vector<std::vector<int64_t>>& input_shapes,
for (int i = 0; i < input_shapes[j].size(); ++i) { for (int i = 0; i < input_shapes[j].size(); ++i) {
input_num *= input_shapes[j][i]; input_num *= input_shapes[j][i];
} }
for (int i = 0; i < input_num; ++i) { for (int i = 0; i < input_num; ++i) {
input_data[i] = 1.f; input_data[i] = 1.f;
} }
......
...@@ -68,26 +68,44 @@ adb pull /data/local/tmp/test_yolov3_detection_result.jpg ./ ...@@ -68,26 +68,44 @@ adb pull /data/local/tmp/test_yolov3_detection_result.jpg ./
cd ../mobile_classify cd ../mobile_classify
wget http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz wget http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz
tar zxvf mobilenet_v1.tar.gz tar zxvf mobilenet_v1.tar.gz
./model_optimize_tool optimize model
make make
adb push mobile_classify /data/local/tmp/
adb push test.jpg /data/local/tmp/ adb -s emulator-5554 push mobile_classify /data/local/tmp/
adb push labels.txt /data/local/tmp/ adb -s emulator-5554 push test.jpg /data/local/tmp/
adb push ../../../cxx/lib/libpaddle_light_api_shared.so /data/local/tmp/ adb -s emulator-5554 push labels.txt /data/local/tmp/
adb shell chmod +x /data/local/tmp/mobile_classify adb -s emulator-5554 push ../../../cxx/lib/libpaddle_light_api_shared.so /data/local/tmp/
adb shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH && adb -s emulator-5554 shell chmod +x /data/local/tmp/mobile_classify
/data/local/tmp/mobile_classify /data/local/tmp/mobilenet_v1 /data/local/tmp/test.jpg /data/local/tmp/labels.txt" adb -s emulator-5554 shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH &&
/data/local/tmp/mobile_classify /data/local/tmp/mobilenetv1opt2 /data/local/tmp/test.jpg /data/local/tmp/labels.txt"
``` ```
运行成功将在控制台输出预测结果的前5个类别的预测概率 运行成功将在控制台输出预测结果的前5个类别的预测概率
- 如若想看前10个类别的预测概率,在运行命令输入topk的值即可 - 如若想看前10个类别的预测概率,在运行命令输入topk的值即可
eg: eg:
```shell ```shell
adb shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH && adb -s emulator-5554 shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH &&
/data/local/tmp/mobile_classify /data/local/tmp/mobilenet_v1 /data/local/tmp/test.jpg /data/local/tmp/labels.txt 10" /data/local/tmp/mobile_classify /data/local/tmp/mobilenetv1opt2/ /data/local/tmp/test.jpg /data/local/tmp/labels.txt 10"
``` ```
- 如若想看其他模型的分类结果, 在运行命令输入model_dir 及其model的输入大小即可 - 如若想看其他模型的分类结果, 在运行命令输入model_dir 及其model的输入大小即可
eg: eg:
```shell ```shell
adb shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH && adb -s emulator-5554 shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH &&
/data/local/tmp/mobile_classify /data/local/tmp/mobilenet_v2 /data/local/tmp/test.jpg /data/local/tmp/labels.txt 10 224 224" /data/local/tmp/mobile_classify /data/local/tmp/mobilenetv2opt2/ /data/local/tmp/test.jpg /data/local/tmp/labels.txt 10 224 224"
``` ```
9. 编译含CV预处理库模型单测demo
```shell
cd ../test_cv
wget http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz
tar zxvf mobilenet_v1.tar.gz
./model_optimize_tool optimize model
make
adb -s emulator-5554 push test_model_cv /data/local/tmp/
adb -s emulator-5554 push test.jpg /data/local/tmp/
adb -s emulator-5554 push labels.txt /data/local/tmp/
adb -s emulator-5554 push ../../../cxx/lib/libpaddle_full_api_shared.so /data/local/tmp/
adb -s emulator-5554 shell chmod +x /data/local/tmp/test_model_cv
adb -s emulator-5554 shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH &&
/data/local/tmp/test_model_cv /data/local/tmp/mobilenetv1opt2 /data/local/tmp/test.jpg /data/local/tmp/labels.txt"
```
运行成功将在控制台输出预测结果的前10个类别的预测概率
ARM_ABI = arm7
LITE_WITH_CV = ON
export ARM_ABI
export LITE_WITH_CV
include ../Makefile.def
LITE_ROOT=../../../
THIRD_PARTY_DIR=${LITE_ROOT}/third_party
OPENCV_VERSION=opencv4.1.0
OPENCV_LIBS = ../../../third_party/${OPENCV_VERSION}/armeabi-v7a/libs/libopencv_imgcodecs.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/libs/libopencv_imgproc.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/libs/libopencv_core.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libtegra_hal.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibjpeg-turbo.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibwebp.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibpng.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibjasper.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/liblibtiff.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libIlmImf.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libtbb.a \
../../../third_party/${OPENCV_VERSION}/armeabi-v7a/3rdparty/libs/libcpufeatures.a
OPENCV_INCLUDE = -I../../../third_party/${OPENCV_VERSION}/armeabi-v7a/include
CXX_INCLUDES = $(INCLUDES) ${OPENCV_INCLUDE} -I$(LITE_ROOT)/cxx/include
CXX_LIBS = ${OPENCV_LIBS} -L$(LITE_ROOT)/cxx/lib/ -lpaddle_full_api_shared $(SYSTEM_LIBS)
###############################################################
# How to use one of static libaray: #
# `libpaddle_api_full_bundled.a` #
# `libpaddle_api_light_bundled.a` #
###############################################################
# Note: default use lite's shared library. #
###############################################################
# 1. Comment above line using `libpaddle_light_api_shared.so`
# 2. Undo comment below line using `libpaddle_api_light_bundled.a`
#CXX_LIBS = $(LITE_ROOT)/cxx/lib/libpaddle_api_light_bundled.a $(SYSTEM_LIBS)
test_model_cv: fetch_opencv test_model_cv.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_model_cv.o -o test_model_cv $(CXX_LIBS) $(LDFLAGS)
test_model_cv.o: test_model_cv.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_model_cv.o -c test_model_cv.cc
test_img_prepross: fetch_opencv test_img_prepross.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_img_prepross.o -o test_img_prepross $(CXX_LIBS) $(LDFLAGS)
test_img_prepross.o: test_img_prepross.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_img_prepross.o -c test_img_prepross.cc
fetch_opencv:
@ test -d ${THIRD_PARTY_DIR} || mkdir ${THIRD_PARTY_DIR}
@ test -e ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz || \
(echo "fetch opencv libs" && \
wget -P ${THIRD_PARTY_DIR} https://paddle-inference-dist.bj.bcebos.com/${OPENCV_VERSION}.tar.gz)
@ test -d ${THIRD_PARTY_DIR}/${OPENCV_VERSION} || \
tar -zxvf ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz -C ${THIRD_PARTY_DIR}
.PHONY: clean
clean:
rm -f test_model_cv.o
rm -f test_model_cv
rm -f test_img_prepross.o
rm -f test_img_prepross
ARM_ABI = arm8
LITE_WITH_CV = ON
export ARM_ABI
export LITE_WITH_CV
include ../Makefile.def
LITE_ROOT=../../../
THIRD_PARTY_DIR=${LITE_ROOT}/third_party
OPENCV_VERSION=opencv4.1.0
OPENCV_LIBS = ../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgcodecs.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgproc.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_core.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtegra_hal.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjpeg-turbo.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibwebp.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibpng.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjasper.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibtiff.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libIlmImf.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtbb.a \
../../../third_party/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libcpufeatures.a
OPENCV_INCLUDE = -I../../../third_party/${OPENCV_VERSION}/arm64-v8a/include
CXX_INCLUDES = $(INCLUDES) ${OPENCV_INCLUDE} -I$(LITE_ROOT)/cxx/include
CXX_LIBS = ${OPENCV_LIBS} -L$(LITE_ROOT)/cxx/lib/ -lpaddle_full_api_shared $(SYSTEM_LIBS)
###############################################################
# How to use one of static libaray: #
# `libpaddle_api_full_bundled.a` #
# `libpaddle_api_light_bundled.a` #
###############################################################
# Note: default use lite's shared library. #
###############################################################
# 1. Comment above line using `libpaddle_light_api_shared.so`
# 2. Undo comment below line using `libpaddle_api_light_bundled.a`
#CXX_LIBS = ${OPENCV_LIBS} $(LITE_ROOT)/cxx/lib/libpaddle_api_light_bundled.a $(SYSTEM_LIBS)
test_model_cv: fetch_opencv test_model_cv.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_model_cv.o -o test_model_cv $(CXX_LIBS) $(LDFLAGS)
test_model_cv.o: test_model_cv.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_model_cv.o -c test_model_cv.cc
test_img_prepross: fetch_opencv test_img_prepross.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_img_prepross.o -o test_img_prepross $(CXX_LIBS) $(LDFLAGS)
test_img_prepross.o: test_img_prepross.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_img_prepross.o -c test_img_prepross.cc
fetch_opencv:
@ test -d ${THIRD_PARTY_DIR} || mkdir ${THIRD_PARTY_DIR}
@ test -e ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz || \
(echo "fetch opencv libs" && \
wget -P ${THIRD_PARTY_DIR} https://paddle-inference-dist.bj.bcebos.com/${OPENCV_VERSION}.tar.gz)
@ test -d ${THIRD_PARTY_DIR}/${OPENCV_VERSION} || \
tar -zxvf ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz -C ${THIRD_PARTY_DIR}
.PHONY: clean
clean:
rm -f test_model_cv.o
rm -f test_model_cv
rm -f test_img_prepross.o
rm -f test_img_prepross
...@@ -117,7 +117,7 @@ void pre_process(const cv::Mat& img, ...@@ -117,7 +117,7 @@ void pre_process(const cv::Mat& img,
float* means, float* means,
float* scales) { float* scales) {
cv::Mat rgb_img; cv::Mat rgb_img;
// cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB); cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB);
cv::resize(rgb_img, rgb_img, cv::Size(width, height), 0.f, 0.f); cv::resize(rgb_img, rgb_img, cv::Size(width, height), 0.f, 0.f);
cv::Mat imgf; cv::Mat imgf;
rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f); rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f);
......
# 图像预测库的使用
1. 下载源码(https://github.com/PaddlePaddle/Paddle-Lite),打开LITE_WITH_CV=ON,编译full_publish模式
example:
```shell
set BUILD_WITH_CV=ON or LITE_WITH_CV=ON
./lite/tools/build.sh
--arm_os=android
--arm_abi=armv8
--arm_lang=gcc
--android_stl=c++_static
full_publish
```
2. 准备模型和优化模型
example:
```shell
wget http://paddle-inference-dist.bj.bcebos.com/mobilenet_v1.tar.gz
tar zxvf mobilenet_v1.tar.gz
./lite/tools/build.sh build_optimize_tool
./build.model_optimize_tool/lite/api/model_optimize_tool
--optimize_out_type=naive_buffer
--optimize_out=model_dir
--model_dir=model_dir
--prefer_int8_kernel=false
```
3. 编译并运行完整test_model_cv demo
example:
```shell
cd inference_lite_lib.android.armv8/demo/cxx/test_cv
```
- 修改MakeFile, 注释编译test_img_propress 语句
```shell
test_model_cv: fetch_opencv test_model_cv.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_model_cv.o -o test_model_cv $(CXX_LIBS) $(LDFLAGS)
test_model_cv.o: test_model_cv.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_model_cv.o -c test_model_cv.cc
#test_img_propress: fetch_opencv test_img_propress.o
# $(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_img_propress.o -o test_img_propress $(CXX_LIBS) $(LDFLAGS)
#test_img_propress.o: test_img_propress.cc
# $(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_img_propress.o -c test_img_propress.cc
.PHONY: clean
clean:
rm -f test_model_cv.o
rm -f test_model_cv
#rm -f test_img_propress.o
#rm -f test_img_propress
```
- 修改../../..//cxx/include/paddle_image_preprocess.h, 修改paddle_api.h头文件的路径
```shell
origin:
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h"
now:
#include "paddle_api.h"
#include "paddle_place.h"
```
- 测试模型必须是优化后的模型
```shell
make
adb -s device_id push mobilenet_v1 /data/local/tmp/
adb -s device_id push test_model_cv /data/local/tmp/
adb -s device_id push test.jpg /data/local/tmp/
adb -s device_id push ../../../cxx/lib/libpaddle_full_api_shared.so /data/local/tmp/
adb -s device_id shell chmod +x /data/local/tmp/test_model_cv
adb -s device_id shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH &&
/data/local/tmp/test_model_cv /data/local/tmp/mobilenet_v1 /data/local/tmp/test.jpg 1 3 224 224 "
```
运行成功将在控制台输出部分预测结果
4. 编译并运行完整test_img_preprocess demo
example:
```shell
cd inference_lite_lib.android.armv8/demo/cxx/test_cv
```
- 修改MakeFile, 注释编译test_model_cv 语句
```shell
#test_model_cv: fetch_opencv test_model_cv.o
# $(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_model_cv.o -o test_model_cv $(CXX_LIBS) $(LDFLAGS)
#test_model_cv.o: test_model_cv.cc
# $(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_model_cv.o -c test_model_cv.cc
test_img_propress: fetch_opencv test_img_propress.o
$(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) test_img_propress.o -o test_img_propress $(CXX_LIBS) $(LDFLAGS)
test_img_propress.o: test_img_propress.cc
$(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o test_img_propress.o -c test_img_propress.cc
.PHONY: clean
clean:
#rm -f test_model_cv.o
#rm -f test_model_cv
rm -f test_img_propress.o
rm -f test_img_propress
```
- 修改../../..//cxx/include/paddle_image_preprocess.h, 修改paddle_api.h头文件的路径
```shell
origin:
#include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h"
now:
#include "paddle_api.h"
#include "paddle_place.h"
```
- 测试模型必须是优化后的模型
```shell
make
adb -s device_id push mobilenet_v1 /data/local/tmp/
adb -s device_id push test_img_propress /data/local/tmp/
adb -s device_id push test.jpg /data/local/tmp/
adb -s device_id push ../../../cxx/lib/libpaddle_full_api_shared.so /data/local/tmp/
adb -s device_id shell chmod +x /data/local/tmp/test_model_cv
adb -s device_id shell "export LD_LIBRARY_PATH=/data/local/tmp/:$LD_LIBRARY_PATH &&
/data/local/tmp/test_img_propress /data/local/tmp/test.jpg /data/local/tmp/ 3 3 1 3 224 224 /data/local/tmp/mobilenet_v1 "
adb -s device_id pull /data/local/tmp/resize.jpg ./
adb -s device_id pull /data/local/tmp/convert.jpg ./
adb -s device_id pull /data/local/tmp/flip.jpg ./
adb -s device_id pull /data/local/tmp/rotate.jpg ./
```
运行成功将在控制台输出OpenCV 和 Padlle-lite的耗时;同时,将在test_cv目录下看到生成的图像预处理结果图: 如:resize.jpg、convert.jpg等
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" // NOLINT
#include "paddle_image_preprocess.h" // NOLINT
#include "time.h" // NOLINT
typedef paddle::lite_api::Tensor Tensor;
typedef paddle::lite::utils::cv::ImageFormat ImageFormat;
typedef paddle::lite::utils::cv::FlipParam FlipParam;
typedef paddle::lite::utils::cv::TransParam TransParam;
typedef paddle::lite::utils::cv::ImagePreprocess ImagePreprocess;
typedef paddle::lite_api::DataLayoutType LayoutType;
using namespace paddle::lite_api; // NOLINT
void fill_with_mat(cv::Mat& mat, uint8_t* src) { // NOLINT
for (int i = 0; i < mat.rows; i++) {
for (int j = 0; j < mat.cols; j++) {
int tmp = (i * mat.cols + j) * 3;
cv::Vec3b& rgb = mat.at<cv::Vec3b>(i, j);
rgb[0] = src[tmp];
rgb[1] = src[tmp + 1];
rgb[2] = src[tmp + 2];
}
}
}
void test_img(std::vector<int> cluster_id,
std::vector<int> thread_num,
std::string img_path,
std::string dst_path,
ImageFormat srcFormat,
ImageFormat dstFormat,
int width,
int height,
float rotate,
FlipParam flip,
LayoutType layout,
std::string model_dir,
int test_iter = 1) {
// init
// paddle::lite::DeviceInfo::Init();
// read img and pre-process
cv::Mat img = imread(img_path, cv::IMREAD_COLOR);
float means[3] = {0.485f, 0.456f, 0.406f};
float scales[3] = {0.229f, 0.224f, 0.225f};
int srch = img.rows;
int srcw = img.cols;
for (auto& cls : cluster_id) {
for (auto& th : thread_num) {
std::cout << "cluster: " << cls << ", threads: " << th << std::endl;
// 1. Set MobileConfig
MobileConfig config;
config.set_model_dir(model_dir);
config.set_power_mode((PowerMode)cls);
config.set_threads(th);
std::cout << "model: " << model_dir;
// 2. Create PaddlePredictor by MobileConfig
std::shared_ptr<PaddlePredictor> predictor =
CreatePaddlePredictor<MobileConfig>(config);
// 3. Prepare input data from image
std::unique_ptr<Tensor> input_tensor(predictor->GetInput(0));
/*
imread(img_path, param)
IMREAD_UNCHANGED(<0) 表示加载原图,不做任何改变
IMREAD_GRAYSCALE ( 0)表示把原图作为灰度图像加载进来
IMREAD_COLOR (>0) 表示把原图作为RGB图像加载进来
*/
cv::Mat img;
if (srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB) {
img = imread(img_path, cv::IMREAD_COLOR);
} else if (srcFormat == ImageFormat::GRAY) {
img = imread(img_path, cv::IMREAD_GRAYSCALE);
} else {
printf("this format %d does not support \n", srcFormat);
return;
}
if (img.empty()) {
std::cout << "opencv read image " << img_path.c_str() << " failed"
<< std::endl;
return;
}
int srch = img.rows;
int srcw = img.cols;
int dsth = height;
int dstw = width;
std::cout << " input tensor size, num= " << 1 << ", channel= " << 1
<< ", height= " << srch << ", width= " << srcw
<< ", srcFormat= " << (ImageFormat)srcFormat << std::endl;
// RGBA = 0, BGRA, RGB, BGR, GRAY, NV21 = 11, NV12,
if (srcFormat == ImageFormat::GRAY) {
std::cout << "srcFormat: GRAY" << std::endl;
}
if (srcFormat == ImageFormat::BGR) {
std::cout << "srcFormat: BGR" << std::endl;
}
if (srcFormat == ImageFormat::RGB) {
std::cout << "srcFormat: RGB" << std::endl;
}
std::cout << " output tensor size, num=" << 1 << ", channel=" << 1
<< ", height=" << dsth << ", width=" << dstw
<< ", dstFormat= " << (ImageFormat)dstFormat << std::endl;
if (dstFormat == ImageFormat::GRAY) {
std::cout << "dstFormat: GRAY" << std::endl;
}
if (dstFormat == ImageFormat::BGR) {
std::cout << "dstFormat: BGR" << std::endl;
}
if (dstFormat == ImageFormat::RGB) {
std::cout << "dstFormat: RGB" << std::endl;
}
std::cout << "Rotate = " << rotate << ", Flip = " << flip
<< ", Layout = " << static_cast<int>(layout) << std::endl;
if (static_cast<int>(layout) != 1 && static_cast<int>(layout) != 3) {
std::cout << "this layout" << static_cast<int>(layout)
<< " is no support" << std::endl;
}
int size = 3 * srch * srcw;
if (srcFormat == ImageFormat::BGR || srcFormat == ImageFormat::RGB) {
size = 3 * srch * srcw;
} else if (srcFormat == ImageFormat::GRAY) {
size = srch * srcw;
}
uint8_t* src = img.data;
int out_size = srch * srcw;
int resize = dstw * dsth;
if (dstFormat == ImageFormat::BGR || dstFormat == ImageFormat::RGB) {
out_size = 3 * srch * srcw;
resize = 3 * dsth * dstw;
} else if (dstFormat == ImageFormat::GRAY) {
out_size = srch * srcw;
resize = dsth * dstw;
}
// out
uint8_t* lite_dst = new uint8_t[out_size];
uint8_t* resize_tmp = new uint8_t[resize];
uint8_t* tv_out_ratote = new uint8_t[out_size];
uint8_t* tv_out_flip = new uint8_t[out_size];
std::vector<int64_t> shape_out = {1, 3, srch, srcw};
input_tensor->Resize(shape_out);
Tensor dst_tensor = *input_tensor;
std::cout << "opencv compute" << std::endl;
cv::Mat im_convert;
cv::Mat im_resize;
cv::Mat im_rotate;
cv::Mat im_flip;
double to_1 = 0;
double to_2 = 0;
double to_3 = 0;
double to_4 = 0;
double to1 = 0;
for (int i = 0; i < test_iter; i++) {
clock_t start = clock();
clock_t begin = clock();
// convert bgr-gray
if (dstFormat == srcFormat) {
im_convert = img;
} else if (dstFormat == ImageFormat::BGR &&
srcFormat == ImageFormat::GRAY) {
cv::cvtColor(img, im_convert, cv::COLOR_GRAY2BGR);
} else if (srcFormat == ImageFormat::BGR &&
dstFormat == ImageFormat::GRAY) {
cv::cvtColor(img, im_convert, cv::COLOR_BGR2GRAY);
} else if (dstFormat == srcFormat) {
printf("convert format error \n");
return;
}
clock_t end = clock();
to_1 += (end - begin);
begin = clock();
// resize default linear
cv::resize(im_convert, im_resize, cv::Size(dstw, dsth), 0.f, 0.f);
end = clock();
to_2 += (end - begin);
begin = clock();
// rotate 90
if (rotate == 90) {
cv::flip(im_convert.t(), im_rotate, 1);
} else if (rotate == 180) {
cv::flip(im_convert, im_rotate, -1);
} else if (rotate == 270) {
cv::flip(im_convert.t(), im_rotate, 0);
}
end = clock();
to_3 += (end - begin);
begin = clock();
// flip
cv::flip(im_convert, im_flip, flip);
end = clock();
to_4 += (end - begin);
clock_t ovet = clock();
to1 += (ovet - start);
}
std::cout << "Paddle-lite compute" << std::endl;
double lite_to = 0;
double lite_to_1 = 0;
double lite_to_2 = 0;
double lite_to_3 = 0;
double lite_to_4 = 0;
double lite_to_5 = 0;
TransParam tparam;
tparam.ih = srch;
tparam.iw = srcw;
tparam.oh = dsth;
tparam.ow = dstw;
tparam.flip_param = flip;
tparam.rotate_param = rotate;
ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam);
for (int i = 0; i < test_iter; ++i) {
clock_t start = clock();
clock_t begin = clock();
image_preprocess.imageConvert(src, lite_dst);
clock_t end = clock();
lite_to_1 += (end - begin);
begin = clock();
image_preprocess.imageResize(lite_dst, resize_tmp);
end = clock();
lite_to_2 += (end - begin);
begin = clock();
image_preprocess.imageRotate(
lite_dst, tv_out_ratote, (ImageFormat)dstFormat, srcw, srch, 90);
end = clock();
lite_to_3 += (end - begin);
begin = clock();
image_preprocess.imageFlip(
lite_dst, tv_out_flip, (ImageFormat)dstFormat, srcw, srch, flip);
end = clock();
lite_to_4 += (end - begin);
clock_t over = clock();
lite_to += (over - start);
begin = clock();
image_preprocess.image2Tensor(lite_dst,
&dst_tensor,
(ImageFormat)dstFormat,
srcw,
srch,
layout,
means,
scales);
end = clock();
lite_to_5 += (end - begin);
}
to_1 = 1000 * to_1 / CLOCKS_PER_SEC;
to_2 = 1000 * to_2 / CLOCKS_PER_SEC;
to_3 = 1000 * to_3 / CLOCKS_PER_SEC;
to_4 = 1000 * to_4 / CLOCKS_PER_SEC;
to1 = 1000 * to1 / CLOCKS_PER_SEC;
std::cout << "opencv convert run time: " << to_1
<< "ms, avg: " << to_1 / test_iter << std::endl;
std::cout << "opencv resize run time: " << to_2
<< "ms, avg: " << to_2 / test_iter << std::endl;
std::cout << "opencv rotate run time: " << to_3
<< "ms, avg: " << to_3 / test_iter << std::endl;
std::cout << "opencv flip time: " << to_4
<< "ms, avg: " << to_4 / test_iter << std::endl;
std::cout << "opencv total run time: " << to1
<< "ms, avg: " << to1 / test_iter << std::endl;
std::cout << "------" << std::endl;
lite_to_1 = 1000 * lite_to_1 / CLOCKS_PER_SEC;
lite_to_2 = 1000 * lite_to_2 / CLOCKS_PER_SEC;
lite_to_3 = 1000 * lite_to_3 / CLOCKS_PER_SEC;
lite_to_4 = 1000 * lite_to_4 / CLOCKS_PER_SEC;
lite_to_5 = 1000 * lite_to_5 / CLOCKS_PER_SEC;
lite_to = 1000 * lite_to / CLOCKS_PER_SEC;
std::cout << "lite convert run time: " << lite_to_1
<< "ms, avg: " << lite_to_1 / test_iter << std::endl;
std::cout << "lite resize run time: " << lite_to_2
<< "ms, avg: " << lite_to_2 / test_iter << std::endl;
std::cout << "lite rotate run time: " << lite_to_3
<< "ms, avg: " << lite_to_3 / test_iter << std::endl;
std::cout << "lite flip time: " << lite_to_4
<< "ms, avg: " << lite_to_4 / test_iter << std::endl;
std::cout << "lite total run time: " << lite_to
<< "ms, avg: " << lite_to / test_iter << std::endl;
std::cout << "lite img2tensor time: " << lite_to_5
<< "ms, avg: " << lite_to_5 / test_iter << std::endl;
std::cout << "------" << std::endl;
double max_ratio = 0;
double max_diff = 0;
const double eps = 1e-6f;
// save_img
std::cout << "write image: " << std::endl;
std::string resize_name = dst_path + "/resize.jpg";
std::string convert_name = dst_path + "/convert.jpg";
std::string rotate_name = dst_path + "/rotate.jpg";
std::string flip_name = dst_path + "/flip.jpg";
cv::Mat resize_mat(dsth, dstw, CV_8UC3);
cv::Mat convert_mat(srch, srcw, CV_8UC3);
cv::Mat rotate_mat;
if (rotate == 90 || rotate == 270) {
rotate_mat = cv::Mat(srcw, srch, CV_8UC3);
} else {
rotate_mat = cv::Mat(srch, srcw, CV_8UC3);
}
cv::Mat flip_mat(srch, srcw, CV_8UC3);
fill_with_mat(resize_mat, resize_tmp);
fill_with_mat(convert_mat, lite_dst);
fill_with_mat(rotate_mat, tv_out_ratote);
fill_with_mat(flip_mat, tv_out_flip);
cv::imwrite(convert_name, convert_mat);
cv::imwrite(resize_name, resize_mat);
cv::imwrite(rotate_name, rotate_mat);
cv::imwrite(flip_name, flip_mat);
delete[] lite_dst;
delete[] resize_tmp;
delete[] tv_out_ratote;
delete[] tv_out_flip;
}
}
}
int main(int argc, char** argv) {
if (argc < 7) {
std::cerr << "[ERROR] usage: " << argv[0]
<< " image_path dst_apth srcFormat dstFormat width height\n";
exit(1);
}
std::string image_path = argv[1];
std::string dst_path = argv[2];
int srcFormat = atoi(argv[3]);
int dstFormat = atoi(argv[4]);
int width = atoi(argv[5]);
int height = atoi(argv[6]);
int flip = -1;
float rotate = 90;
int layout = 1;
std::string model_dir = "mobilenet_v1";
if (argc > 7) {
model_dir = argv[7];
}
if (argc > 8) {
flip = atoi(argv[8]);
}
if (argc > 9) {
rotate = atoi(argv[9]);
}
if (argc > 10) {
layout = atoi(argv[10]);
}
test_img({3},
{1, 2, 4},
image_path,
dst_path,
(ImageFormat)srcFormat,
(ImageFormat)dstFormat,
width,
height,
rotate,
(FlipParam)flip,
(LayoutType)layout,
model_dir,
20);
return 0;
}
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "opencv2/core.hpp"
#include "opencv2/imgcodecs.hpp"
#include "opencv2/imgproc.hpp"
#include "paddle_api.h" // NOLINT
#include "paddle_image_preprocess.h" // NOLINT
#include "time.h" // NOLINT
using namespace paddle::lite_api; // NOLINT
int64_t ShapeProduction(const shape_t& shape) {
int64_t res = 1;
for (auto i : shape) res *= i;
return res;
}
// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up
void neon_mean_scale(
const float* din, float* dout, int size, float* mean, float* scale) {
float32x4_t vmean0 = vdupq_n_f32(mean[0]);
float32x4_t vmean1 = vdupq_n_f32(mean[1]);
float32x4_t vmean2 = vdupq_n_f32(mean[2]);
float32x4_t vscale0 = vdupq_n_f32(1.f / scale[0]);
float32x4_t vscale1 = vdupq_n_f32(1.f / scale[1]);
float32x4_t vscale2 = vdupq_n_f32(1.f / scale[2]);
float* dout_c0 = dout;
float* dout_c1 = dout + size;
float* dout_c2 = dout + size * 2;
int i = 0;
for (; i < size - 3; i += 4) {
float32x4x3_t vin3 = vld3q_f32(din);
float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0);
float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1);
float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2);
float32x4_t vs0 = vmulq_f32(vsub0, vscale0);
float32x4_t vs1 = vmulq_f32(vsub1, vscale1);
float32x4_t vs2 = vmulq_f32(vsub2, vscale2);
vst1q_f32(dout_c0, vs0);
vst1q_f32(dout_c1, vs1);
vst1q_f32(dout_c2, vs2);
din += 12;
dout_c0 += 4;
dout_c1 += 4;
dout_c2 += 4;
}
for (; i < size; i++) {
*(dout_c0++) = (*(din++) - mean[0]) * scale[0];
*(dout_c0++) = (*(din++) - mean[1]) * scale[1];
*(dout_c0++) = (*(din++) - mean[2]) * scale[2];
}
}
void pre_process(const cv::Mat& img, int width, int height, Tensor dstTensor) {
#ifdef LITE_WITH_CV
typedef paddle::lite::utils::cv::ImageFormat ImageFormat;
typedef paddle::lite::utils::cv::FlipParam FlipParam;
typedef paddle::lite::utils::cv::TransParam TransParam;
typedef paddle::lite::utils::cv::ImagePreprocess ImagePreprocess;
typedef paddle::lite_api::DataLayoutType LayoutType;
// init TransParam
TransParam tp;
tp.iw = img.cols;
tp.ih = img.rows;
tp.ow = width;
tp.oh = height;
ImageFormat srcFormat = ImageFormat::BGR;
ImageFormat dstFormat = ImageFormat::RGB;
// init ImagePreprocess
ImagePreprocess img_process(srcFormat, dstFormat, tp);
// init temp var
const uint8_t* img_ptr = reinterpret_cast<const uint8_t*>(img.data);
uint8_t* rgb_ptr = new uint8_t[img.cols * img.rows * 3];
uint8_t* resize_ptr = new uint8_t[width * height * 3];
// do convert bgr--rgb
img_process.imageConvert(img_ptr, rgb_ptr);
// do resize
img_process.imageResize(rgb_ptr, resize_ptr);
// data--tensor and normalize
float means[3] = {103.94f, 116.78f, 123.68f};
float scales[3] = {0.017f, 0.017f, 0.017f};
img_process.image2Tensor(
resize_ptr, &dstTensor, LayoutType::kNCHW, means, scales);
float* data = dstTensor.mutable_data<float>();
#else
cv::Mat rgb_img;
cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB);
cv::resize(rgb_img, rgb_img, cv::Size(width, height), 0.f, 0.f);
cv::Mat imgf;
rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f);
float means[3] = {0.485f, 0.456f, 0.406f};
float scales[3] = {0.229f, 0.224f, 0.225f};
const float* dimg = reinterpret_cast<const float*>(imgf.data);
float* data = dstTensor.mutable_data<float>();
neon_mean_scale(dimg, data, width * height, means, scales);
#endif
}
void RunModel(std::string model_dir,
std::string img_path,
std::vector<int> input_shape,
PowerMode power_mode,
int thread_num,
int test_iter,
int warmup = 0) {
// 1. Set MobileConfig
MobileConfig config;
config.set_model_dir(model_dir);
config.set_power_mode(power_mode);
config.set_threads(thread_num);
// 2. Create PaddlePredictor by MobileConfig
std::shared_ptr<PaddlePredictor> predictor =
CreatePaddlePredictor<MobileConfig>(config);
// 3. Prepare input data from image
std::unique_ptr<Tensor> input_tensor(std::move(predictor->GetInput(0)));
input_tensor->Resize(
{input_shape[0], input_shape[1], input_shape[2], input_shape[3]});
auto* data = input_tensor->mutable_data<float>();
// read img and pre-process
cv::Mat img = imread(img_path, cv::IMREAD_COLOR);
pre_process(img, input_shape[3], input_shape[2], *input_tensor);
// 4. Run predictor
for (int i = 0; i < warmup; ++i) {
predictor->Run();
}
double lps = 0.f;
double min_time = 1000000.f;
double max_time = 0.f;
for (int i = 0; i < test_iter; ++i) {
clock_t begin = clock();
predictor->Run();
clock_t end = clock();
double t = (end - begin) * 1000;
t = t / CLOCKS_PER_SEC;
lps += t;
if (t < min_time) {
min_time = t;
}
if (t > max_time) {
max_time = t;
}
std::cout << "iter: " << i << ", time: " << t << " ms" << std::endl;
}
std::cout << "================== Speed Report ==================="
<< std::endl;
std::cout << "Model: " << model_dir
<< ", power_mode: " << static_cast<int>(power_mode)
<< ", threads num " << thread_num << ", warmup: " << warmup
<< ", repeats: " << test_iter << ", avg time: " << lps / test_iter
<< " ms"
<< ", min time: " << min_time << " ms"
<< ", max time: " << max_time << " ms." << std::endl;
// 5. Get output and post process
std::unique_ptr<const Tensor> output_tensor(
std::move(predictor->GetOutput(0)));
auto* outptr = output_tensor->data<float>();
auto shape_out = output_tensor->shape();
int output_num = 1;
for (int i = 0; i < shape_out.size(); ++i) {
output_num *= shape_out[i];
}
std::cout << "output_num: " << output_num << std::endl;
for (int i = 0; i < output_num; i += 100) {
std::cout << "i: " << i << ", out: " << outptr[i] << std::endl;
}
}
int main(int argc, char** argv) {
if (argc < 7) {
std::cerr << "[ERROR] usage: " << argv[0]
<< " model_dir image_path input_shape\n";
exit(1);
}
std::string model_dir = argv[1];
std::string img_path = argv[2];
std::vector<int> input_shape;
input_shape.push_back(atoi(argv[3]));
input_shape.push_back(atoi(argv[4]));
input_shape.push_back(atoi(argv[5]));
input_shape.push_back(atoi(argv[6]));
int power_mode = 3;
int threads = 1;
int test_iter = 100;
int warmup = 10;
if (argc > 7) {
power_mode = atoi(argv[7]);
}
if (argc > 8) {
threads = atoi(argv[8]);
}
if (argc > 9) {
test_iter = atoi(argv[9]);
}
if (argc > 10) {
warmup = atoi(argv[10]);
}
RunModel(model_dir,
img_path,
input_shape,
(PowerMode)power_mode,
threads,
test_iter,
warmup);
return 0;
}
if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND LITE_WITH_ARM) if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND LITE_WITH_ARM)
lite_cc_test(image_convert_test SRCS image_convert_test.cc DEPS paddle_cv_arm paddle_api_light ${lite_cv_deps} ${arm_kernels} ${lite_ops} ${host_kernels}) lite_cc_test(image_convert_test SRCS image_convert_test.cc DEPS paddle_cv_arm)
endif() endif()
...@@ -192,7 +192,6 @@ void nv21_bgra_basic(const uint8_t* in_data, ...@@ -192,7 +192,6 @@ void nv21_bgra_basic(const uint8_t* in_data,
nv2bgra(in_data, out_data, srcw, srch, 0, 1); nv2bgra(in_data, out_data, srcw, srch, 0, 1);
} }
/*
/* /*
采用CV_BGR2GRAY,转换公式Gray = 0.1140*B + 0.5870*G + 0.2989*R 采用CV_BGR2GRAY,转换公式Gray = 0.1140*B + 0.5870*G + 0.2989*R
采用CV_RGB2GRAY,转换公式Gray = 0.1140*R + 0.5870*G + 0.2989*B 采用CV_RGB2GRAY,转换公式Gray = 0.1140*R + 0.5870*G + 0.2989*B
...@@ -217,6 +216,21 @@ void bgr_gray_basic(const uint8_t* in_data, ...@@ -217,6 +216,21 @@ void bgr_gray_basic(const uint8_t* in_data,
} }
} }
} }
void bgra_gray_basic(const uint8_t* in_data,
uint8_t* out_data,
int srcw,
int srch) {
for (int i = 0; i < srch; i++) {
const uint8_t* din_ptr = in_data + i * 4 * srcw;
uint8_t* dout_ptr = out_data + i * srcw;
for (int j = 0; j < srcw; j++) {
int sum = din_ptr[0] * 15 + din_ptr[1] * 75 + din_ptr[2] * 38;
sum = sum >> 7;
*dout_ptr++ = sum;
din_ptr += 4;
}
}
}
void gray_bgr_basic(const uint8_t* src, uint8_t* dst, int srcw, int srch) { void gray_bgr_basic(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
for (int i = 0; i < srch; i++) { for (int i = 0; i < srch; i++) {
...@@ -228,6 +242,17 @@ void gray_bgr_basic(const uint8_t* src, uint8_t* dst, int srcw, int srch) { ...@@ -228,6 +242,17 @@ void gray_bgr_basic(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
} }
} }
} }
void gray_bgra_basic(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
for (int i = 0; i < srch; i++) {
for (int j = 0; j < srcw; j++) {
*dst++ = *src;
*dst++ = *src;
*dst++ = *src;
*dst++ = 255;
src++;
}
}
}
// bgr2bgra, rgb2rgba // bgr2bgra, rgb2rgba
void hwc3_to_hwc4_basic(const uint8_t* src, uint8_t* dst, int srcw, int srch) { void hwc3_to_hwc4_basic(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
for (int i = 0; i < srch; i++) { for (int i = 0; i < srch; i++) {
...@@ -340,6 +365,16 @@ void image_convert_basic(const uint8_t* in_data, ...@@ -340,6 +365,16 @@ void image_convert_basic(const uint8_t* in_data,
(srcFormat == ImageFormat::GRAY && (srcFormat == ImageFormat::GRAY &&
dstFormat == ImageFormat::BGR)) { dstFormat == ImageFormat::BGR)) {
gray_bgr_basic(in_data, out_data, srcw, srch); gray_bgr_basic(in_data, out_data, srcw, srch);
} else if ((srcFormat == ImageFormat::RGBA &&
dstFormat == ImageFormat::GRAY) ||
(srcFormat == ImageFormat::BGRA &&
dstFormat == ImageFormat::GRAY)) {
bgra_gray_basic(in_data, out_data, srcw, srch);
} else if ((srcFormat == ImageFormat::GRAY &&
dstFormat == ImageFormat::RGBA) ||
(srcFormat == ImageFormat::GRAY &&
dstFormat == ImageFormat::BGRA)) {
gray_bgra_basic(in_data, out_data, srcw, srch);
} else if ((srcFormat == ImageFormat::RGBA && } else if ((srcFormat == ImageFormat::RGBA &&
dstFormat == ImageFormat::RGB) || dstFormat == ImageFormat::RGB) ||
(srcFormat == ImageFormat::BGRA && (srcFormat == ImageFormat::BGRA &&
...@@ -525,6 +560,7 @@ void image_resize_basic(const uint8_t* in_data, ...@@ -525,6 +560,7 @@ void image_resize_basic(const uint8_t* in_data,
int y_flag = 0; // only one line int y_flag = 0; // only one line
if (y_in_start < 0) { if (y_in_start < 0) {
y_flag = 1; y_flag = 1;
y_in_end = 0;
} }
float b0 = ibeta[dy * 2]; float b0 = ibeta[dy * 2];
float b1 = ibeta[dy * 2 + 1]; float b1 = ibeta[dy * 2 + 1];
...@@ -750,6 +786,26 @@ void image_flip_basic(const uint8_t* in_data, ...@@ -750,6 +786,26 @@ void image_flip_basic(const uint8_t* in_data,
flipxy_basic(in_data, srch, srcw, out_data, num); flipxy_basic(in_data, srch, srcw, out_data, num);
} }
} }
void gray_to_tensor_basic(const uint8_t* bgr,
float* output,
int width,
int height,
float* means,
float* scales,
int num) {
int size = width * height;
float mean_val = means[0];
float scale_val = scales[0];
for (int h = 0; h < height; h++) {
const uint8_t* ptr_bgr = bgr + h * width * num;
float* ptr_h = output + h * width;
for (int i = 0; i < width; i++) {
*ptr_h++ = (ptr_bgr[0] - mean_val) * scale_val;
ptr_bgr += num;
}
}
}
void bgr_to_tensor_chw_basic(const uint8_t* bgr, void bgr_to_tensor_chw_basic(const uint8_t* bgr,
float* output, float* output,
...@@ -828,5 +884,8 @@ void image_to_tensor_basic(const uint8_t* in_data, ...@@ -828,5 +884,8 @@ void image_to_tensor_basic(const uint8_t* in_data,
} else if (layout == LayoutType::kNHWC && (srcFormat == ImageFormat::BGRA || } else if (layout == LayoutType::kNHWC && (srcFormat == ImageFormat::BGRA ||
srcFormat == ImageFormat::RGBA)) { srcFormat == ImageFormat::RGBA)) {
bgr_to_tensor_hwc_basic(in_data, output, srcw, srch, means, scales, 4); bgr_to_tensor_hwc_basic(in_data, output, srcw, srch, means, scales, 4);
} else if (srcFormat == ImageFormat::GRAY &&
(layout == LayoutType::kNHWC || layout == LayoutType::kNCHW)) {
gray_to_tensor_basic(in_data, output, srcw, srch, means, scales, 1);
} }
} }
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "lite/core/profile/timer.h" #include "lite/core/profile/timer.h"
#include "lite/tests/cv/cv_basic.h" #include "lite/tests/cv/cv_basic.h"
#include "lite/utils/cv/paddle_image_preprocess.h" #include "lite/utils/cv/paddle_image_preprocess.h"
#include "time.h" // NOLINT
DEFINE_int32(cluster, 3, "cluster id"); DEFINE_int32(cluster, 3, "cluster id");
DEFINE_int32(threads, 1, "threads num"); DEFINE_int32(threads, 1, "threads num");
...@@ -28,15 +29,15 @@ DEFINE_int32(repeats, 1, "repeats times"); ...@@ -28,15 +29,15 @@ DEFINE_int32(repeats, 1, "repeats times");
DEFINE_bool(basic_test, false, "do all tests"); DEFINE_bool(basic_test, false, "do all tests");
DEFINE_bool(check_result, true, "check the result"); DEFINE_bool(check_result, true, "check the result");
DEFINE_int32(srcFormat, 0, "input image format"); DEFINE_int32(srcFormat, 0, "input image format RGBA");
DEFINE_int32(dstFormat, 1, "output image format"); DEFINE_int32(dstFormat, 2, "output image format RGB");
DEFINE_int32(srch, 1920, "input height"); DEFINE_int32(srch, 1920, "input height");
DEFINE_int32(srcw, 1080, "input width"); DEFINE_int32(srcw, 1080, "input width");
DEFINE_int32(dsth, 960, "output height"); DEFINE_int32(dsth, 960, "output height");
DEFINE_int32(dstw, 540, "output width"); DEFINE_int32(dstw, 540, "output width");
DEFINE_int32(angle, 90, "rotate angel"); DEFINE_int32(angle, 90, "rotate angel");
DEFINE_int32(flip_num, 0, "flip x"); DEFINE_int32(flip_num, 0, "flip x");
DEFINE_int32(layout, 0, "layout nchw"); DEFINE_int32(layout, 1, "layout nchw");
typedef paddle::lite::utils::cv::ImageFormat ImageFormat; typedef paddle::lite::utils::cv::ImageFormat ImageFormat;
typedef paddle::lite::utils::cv::FlipParam FlipParam; typedef paddle::lite::utils::cv::FlipParam FlipParam;
...@@ -99,7 +100,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -99,7 +100,7 @@ void test_img(const std::vector<int>& cluster_id,
float rotate, float rotate,
FlipParam flip, FlipParam flip,
LayoutType layout, LayoutType layout,
int test_iter = 1) { int test_iter = 10) {
#ifdef LITE_WITH_ARM #ifdef LITE_WITH_ARM
paddle::lite::DeviceInfo::Init(); paddle::lite::DeviceInfo::Init();
#endif #endif
...@@ -221,7 +222,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -221,7 +222,7 @@ void test_img(const std::vector<int>& cluster_id,
float scales[3] = {1 / 127.5f, 1 / 127.5f, 1 / 127.5f}; float scales[3] = {1 / 127.5f, 1 / 127.5f, 1 / 127.5f};
if (FLAGS_check_result) { if (FLAGS_check_result) {
LOG(INFO) << "image convert basic compute"; // LOG(INFO) << "image convert basic compute";
image_convert_basic(src, image_convert_basic(src,
basic_dst, basic_dst,
(ImageFormat)srcFormat, (ImageFormat)srcFormat,
...@@ -230,7 +231,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -230,7 +231,7 @@ void test_img(const std::vector<int>& cluster_id,
srch, srch,
out_size); out_size);
LOG(INFO) << "image resize basic compute"; // LOG(INFO) << "image resize basic compute";
image_resize_basic(basic_dst, image_resize_basic(basic_dst,
resize_basic, resize_basic,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
...@@ -239,7 +240,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -239,7 +240,7 @@ void test_img(const std::vector<int>& cluster_id,
dstw, dstw,
dsth); dsth);
LOG(INFO) << "image rotate basic compute"; // LOG(INFO) << "image rotate basic compute";
image_rotate_basic(resize_basic, image_rotate_basic(resize_basic,
tv_out_ratote_basic, tv_out_ratote_basic,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
...@@ -247,7 +248,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -247,7 +248,7 @@ void test_img(const std::vector<int>& cluster_id,
dsth, dsth,
rotate); rotate);
LOG(INFO) << "image flip basic compute"; // LOG(INFO) << "image flip basic compute";
image_flip_basic(resize_basic, image_flip_basic(resize_basic,
tv_out_flip_basic, tv_out_flip_basic,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
...@@ -255,7 +256,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -255,7 +256,7 @@ void test_img(const std::vector<int>& cluster_id,
dsth, dsth,
flip); flip);
LOG(INFO) << "image to tensor basic compute"; // LOG(INFO) << "image to tensor basic compute";
image_to_tensor_basic(resize_basic, image_to_tensor_basic(resize_basic,
&tensor_basic, &tensor_basic,
(ImageFormat)dstFormat, (ImageFormat)dstFormat,
...@@ -267,10 +268,13 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -267,10 +268,13 @@ void test_img(const std::vector<int>& cluster_id,
} }
Timer t1; Timer t1;
Timer t_convert;
Timer t_resize;
Timer t_flip;
Timer t_rotate;
Timer t_tensor;
LOG(INFO) << "saber cv compute"; LOG(INFO) << "saber cv compute";
double to = 0;
double min_time = 100000;
TransParam tparam; TransParam tparam;
tparam.ih = srch; tparam.ih = srch;
tparam.iw = srcw; tparam.iw = srcw;
...@@ -285,15 +289,17 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -285,15 +289,17 @@ void test_img(const std::vector<int>& cluster_id,
ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam); ImagePreprocess image_preprocess(srcFormat, dstFormat, tparam);
for (int i = 0; i < test_iter; ++i) { for (int i = 0; i < test_iter; ++i) {
t1.Reset();
t1.Start(); t1.Start();
LOG(INFO) << "image convert saber compute"; // LOG(INFO) << "image convert saber compute";
t_convert.Start();
// 方法一: image_preprocess.imageCovert(src, lite_dst); // 方法一: image_preprocess.imageCovert(src, lite_dst);
image_preprocess.imageCovert( image_preprocess.imageConvert(
src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat); src, lite_dst, (ImageFormat)srcFormat, (ImageFormat)dstFormat);
t_convert.Stop();
LOG(INFO) << "image resize saber compute"; // LOG(INFO) << "image resize saber compute";
t_resize.Start();
// 方法一:image_preprocess.imageResize(lite_dst, resize_tmp); // 方法一:image_preprocess.imageResize(lite_dst, resize_tmp);
image_preprocess.imageResize(lite_dst, image_preprocess.imageResize(lite_dst,
resize_tmp, resize_tmp,
...@@ -302,8 +308,10 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -302,8 +308,10 @@ void test_img(const std::vector<int>& cluster_id,
srch, srch,
dstw, dstw,
dsth); dsth);
t_resize.Stop();
LOG(INFO) << "image rotate saber compute"; // LOG(INFO) << "image rotate saber compute";
t_rotate.Start();
// 方法一: image_preprocess.imageRotate(resize_tmp, tv_out_ratote); // 方法一: image_preprocess.imageRotate(resize_tmp, tv_out_ratote);
image_preprocess.imageRotate(resize_tmp, image_preprocess.imageRotate(resize_tmp,
tv_out_ratote, tv_out_ratote,
...@@ -311,13 +319,17 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -311,13 +319,17 @@ void test_img(const std::vector<int>& cluster_id,
dstw, dstw,
dsth, dsth,
rotate); rotate);
t_rotate.Stop();
LOG(INFO) << "image flip saber compute"; // LOG(INFO) << "image flip saber compute";
t_flip.Start();
// 方法一: image_preprocess.imageFlip(resize_tmp, tv_out_flip); // 方法一: image_preprocess.imageFlip(resize_tmp, tv_out_flip);
image_preprocess.imageFlip( image_preprocess.imageFlip(
resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip); resize_tmp, tv_out_flip, (ImageFormat)dstFormat, dstw, dsth, flip);
t_flip.Stop();
LOG(INFO) << "image to tensor compute"; // LOG(INFO) << "image to tensor compute";
t_tensor.Start();
// 方法一: image_preprocess.image2Tensor( // 方法一: image_preprocess.image2Tensor(
// resize_tmp, &dst_tensor, layout, means, scales); // resize_tmp, &dst_tensor, layout, means, scales);
image_preprocess.image2Tensor(resize_tmp, image_preprocess.image2Tensor(resize_tmp,
...@@ -328,16 +340,27 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -328,16 +340,27 @@ void test_img(const std::vector<int>& cluster_id,
layout, layout,
means, means,
scales); scales);
t_tensor.Stop();
t1.Stop(); t1.Stop();
double tdiff = t1.LapTimes().Avg();
to += tdiff;
if (tdiff < min_time) {
min_time = tdiff;
}
} }
LOG(INFO) << "image trans total time : " << to LOG(INFO) << "image convert avg time : " << t_convert.LapTimes().Avg()
<< ", avg time : " << to / test_iter; << ", min time: " << t_convert.LapTimes().Min()
<< ", max time: " << t_convert.LapTimes().Max();
LOG(INFO) << "image resize avg time : " << t_resize.LapTimes().Avg()
<< ", min time: " << t_resize.LapTimes().Min()
<< ", max time: " << t_resize.LapTimes().Max();
LOG(INFO) << "image rotate avg time : " << t_rotate.LapTimes().Avg()
<< ", min time: " << t_rotate.LapTimes().Min()
<< ", max time: " << t_rotate.LapTimes().Max();
LOG(INFO) << "image flip avg time : " << t_flip.LapTimes().Avg()
<< ", min time: " << t_flip.LapTimes().Min()
<< ", max time: " << t_flip.LapTimes().Max();
LOG(INFO) << "image tensor avg time : " << t_tensor.LapTimes().Avg()
<< ", min time: " << t_tensor.LapTimes().Min()
<< ", max time: " << t_tensor.LapTimes().Max();
LOG(INFO) << "image trans total avg time : " << t1.LapTimes().Avg()
<< ", min time: " << t1.LapTimes().Min()
<< ", max time: " << t1.LapTimes().Max();
double max_ratio = 0; double max_ratio = 0;
double max_diff = 0; double max_diff = 0;
...@@ -536,7 +559,7 @@ void test_img(const std::vector<int>& cluster_id, ...@@ -536,7 +559,7 @@ void test_img(const std::vector<int>& cluster_id,
} }
} }
#if 1 #if 0
TEST(TestImageConvertRand, test_func_image_convert_preprocess) { TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto w : {1, 4, 8, 16, 112, 224, 1092}) { for (auto w : {1, 4, 8, 16, 112, 224, 1092}) {
...@@ -546,19 +569,16 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) { ...@@ -546,19 +569,16 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
for (auto rotate : {180}) { for (auto rotate : {180}) {
for (auto flip : {0}) { for (auto flip : {0}) {
for (auto srcFormat : {0, 1, 2, 3, 4, 11, 12}) { for (auto srcFormat : {0, 1, 2, 3, 4, 11, 12}) {
for (auto dstFormat : {0, 1, 2, 3}) { for (auto dstFormat : {0, 1, 2, 3, 4}) {
for (auto layout : {1}) { for (auto layout : {1}) {
if ((dstFormat == ImageFormat::GRAY && if ((srcFormat == ImageFormat::NV12 ||
(srcFormat == ImageFormat::RGBA ||
srcFormat == ImageFormat::BGRA)) ||
(srcFormat == ImageFormat::GRAY &&
(dstFormat == ImageFormat::RGBA ||
dstFormat == ImageFormat::BGRA)) ||
(srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) && srcFormat == ImageFormat::NV21) &&
(dstFormat == ImageFormat::GRAY || (dstFormat == ImageFormat::GRAY)) {
dstFormat == ImageFormat::RGBA || continue;
dstFormat == ImageFormat::BGRA)) { }
if ((dstFormat == ImageFormat::NV12 ||
dstFormat == ImageFormat::NV21) &&
(srcFormat == ImageFormat::GRAY)) {
continue; continue;
} }
if (srcFormat == ImageFormat::NV12 || if (srcFormat == ImageFormat::NV12 ||
...@@ -591,7 +611,7 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) { ...@@ -591,7 +611,7 @@ TEST(TestImageConvertRand, test_func_image_convert_preprocess) {
} }
} }
#endif #endif
#if 1 #if 0
TEST(TestImageConvertRand, test_func_image_resize_preprocess) { TEST(TestImageConvertRand, test_func_image_resize_preprocess) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto w : {1, 4, 8, 16, 112, 224, 1092}) { for (auto w : {1, 4, 8, 16, 112, 224, 1092}) {
...@@ -601,21 +621,13 @@ TEST(TestImageConvertRand, test_func_image_resize_preprocess) { ...@@ -601,21 +621,13 @@ TEST(TestImageConvertRand, test_func_image_resize_preprocess) {
for (auto rotate : {180}) { for (auto rotate : {180}) {
for (auto flip : {0}) { for (auto flip : {0}) {
for (auto srcFormat : {0, 1, 2, 3, 4, 11, 12}) { for (auto srcFormat : {0, 1, 2, 3, 4, 11, 12}) {
for (auto dstFormat : {0, 1, 2, 3}) { for (auto dstFormat : {0, 1, 2, 3, 4, 11}) {
for (auto layout : {1}) { for (auto layout : {1}) {
if (dstFormat == ImageFormat::NV12 || if (dstFormat == ImageFormat::NV12 ||
dstFormat == ImageFormat::NV21 || dstFormat == ImageFormat::NV21 ||
(dstFormat == ImageFormat::GRAY &&
(srcFormat == ImageFormat::RGBA ||
srcFormat == ImageFormat::BGRA)) ||
(srcFormat == ImageFormat::GRAY &&
(dstFormat == ImageFormat::RGBA ||
dstFormat == ImageFormat::BGRA)) ||
(srcFormat == ImageFormat::NV12 || (srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) && srcFormat == ImageFormat::NV21) &&
(dstFormat == ImageFormat::GRAY || dstFormat == ImageFormat::GRAY) {
dstFormat == ImageFormat::RGBA ||
dstFormat == ImageFormat::BGRA)) {
continue; continue;
} }
if (srcFormat == ImageFormat::NV12 || if (srcFormat == ImageFormat::NV12 ||
...@@ -656,25 +668,10 @@ TEST(TestImageConvertRand, test_func_image_trans_preprocess) { ...@@ -656,25 +668,10 @@ TEST(TestImageConvertRand, test_func_image_trans_preprocess) {
for (auto ww : {32, 112}) { for (auto ww : {32, 112}) {
for (auto hh : {112}) { for (auto hh : {112}) {
for (auto rotate : {90, 180, 270}) { for (auto rotate : {90, 180, 270}) {
for (auto flip : {0, 1, 2}) { for (auto flip : {-1, 0, 1}) {
for (auto srcFormat : {11}) { for (auto srcFormat : {0}) {
for (auto dstFormat : {3}) { for (auto dstFormat : {0, 1, 2, 3, 4}) {
for (auto layout : {1, 3}) { for (auto layout : {1, 3}) {
if (dstFormat == ImageFormat::NV12 ||
dstFormat == ImageFormat::NV21 ||
(dstFormat == ImageFormat::GRAY &&
(srcFormat == ImageFormat::RGBA ||
srcFormat == ImageFormat::BGRA)) ||
(srcFormat == ImageFormat::GRAY &&
(dstFormat == ImageFormat::RGBA ||
dstFormat == ImageFormat::BGRA)) ||
(srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) &&
(dstFormat == ImageFormat::GRAY ||
dstFormat == ImageFormat::RGBA ||
dstFormat == ImageFormat::BGRA)) {
continue;
}
if (srcFormat == ImageFormat::NV12 || if (srcFormat == ImageFormat::NV12 ||
srcFormat == ImageFormat::NV21) { srcFormat == ImageFormat::NV21) {
if (w % 2) { // is not ou shu, two line y == one line if (w % 2) { // is not ou shu, two line y == one line
...@@ -717,7 +714,8 @@ TEST(TestImageConvertCustom, test_func_image_preprocess_custom) { ...@@ -717,7 +714,8 @@ TEST(TestImageConvertCustom, test_func_image_preprocess_custom) {
(ImageFormat)FLAGS_dstFormat, (ImageFormat)FLAGS_dstFormat,
FLAGS_angle, FLAGS_angle,
(FlipParam)FLAGS_flip_num, (FlipParam)FLAGS_flip_num,
(LayoutType)FLAGS_layout); (LayoutType)FLAGS_layout,
20);
} }
#endif #endif
#endif #endif
if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND LITE_WITH_ARM) if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND LITE_WITH_ARM)
set(lite_cv_deps)
lite_cc_library(paddle_cv_arm SRCS lite_cc_library(paddle_cv_arm SRCS
image_convert.cc image_convert.cc
paddle_image_preprocess.cc paddle_image_preprocess.cc
...@@ -7,5 +6,5 @@ if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND LITE_WITH_ ...@@ -7,5 +6,5 @@ if(LITE_WITH_CV AND (NOT LITE_WITH_OPENCL AND NOT LITE_WITH_FPGA) AND LITE_WITH_
image_flip.cc image_flip.cc
image_rotate.cc image_rotate.cc
image_resize.cc image_resize.cc
DEPS ${lite_cv_deps} paddle_api place) DEPS paddle_api place)
endif() endif()
...@@ -18,6 +18,13 @@ namespace paddle { ...@@ -18,6 +18,13 @@ namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
void gray_to_tensor(const uint8_t* src,
float* output,
int width,
int height,
float* means,
float* scales);
void bgr_to_tensor_chw(const uint8_t* src, void bgr_to_tensor_chw(const uint8_t* src,
float* output, float* output,
int width, int width,
...@@ -52,7 +59,7 @@ void bgra_to_tensor_hwc(const uint8_t* src, ...@@ -52,7 +59,7 @@ void bgra_to_tensor_hwc(const uint8_t* src,
* NCHW * NCHW
* param src: input image data * param src: input image data
* param dstTensor: output tensor data * param dstTensor: output tensor data
* param srcFormat: input image format, support BGR(GRB) and BGRA(RGBA) * param srcFormat: input image format, support GRAY, BGR(GRB) and BGRA(RGBA)
* param srcw: input image width * param srcw: input image width
* param srch: input image height * param srch: input image height
* param layout: output tensor layout,support NHWC and NCHW * param layout: output tensor layout,support NHWC and NCHW
...@@ -79,6 +86,9 @@ void Image2Tensor::choose(const uint8_t* src, ...@@ -79,6 +86,9 @@ void Image2Tensor::choose(const uint8_t* src,
} else if (layout == LayoutType::kNHWC && } else if (layout == LayoutType::kNHWC &&
(srcFormat == BGRA || srcFormat == RGBA)) { (srcFormat == BGRA || srcFormat == RGBA)) {
impl_ = bgra_to_tensor_hwc; impl_ = bgra_to_tensor_hwc;
} else if ((layout == LayoutType::kNHWC || layout == LayoutType::kNCHW) &&
(srcFormat == GRAY)) {
impl_ = gray_to_tensor;
} else { } else {
printf("this layout: %d or image format: %d not support \n", printf("this layout: %d or image format: %d not support \n",
static_cast<int>(layout), static_cast<int>(layout),
...@@ -87,6 +97,147 @@ void Image2Tensor::choose(const uint8_t* src, ...@@ -87,6 +97,147 @@ void Image2Tensor::choose(const uint8_t* src,
} }
impl_(src, output, srcw, srch, means, scales); impl_(src, output, srcw, srch, means, scales);
} }
void gray_to_tensor(const uint8_t* src,
float* output,
int width,
int height,
float* means,
float* scales) {
int size = width * height;
float mean_val = means[0];
float scale_val = scales[0];
int dim16 = width >> 16;
int remain = width % 16;
float32x4_t vmean = vdupq_n_f32(mean_val);
float32x4_t vscale = vdupq_n_f32(scale_val);
#pragma omp parallel for
for (int i = 0; i < height; i += 1) {
const uint8_t* din_ptr = src + i * width;
float* ptr_h = output + i * width;
int cnt = dim16;
if (cnt > 0) {
#ifdef __aarch64__
asm volatile(
"prfm pldl1keep, [%[inptr0]] \n"
"prfm pldl1keep, [%[inptr0], #64] \n"
"prfm pldl1keep, [%[inptr0], #128] \n"
"prfm pldl1keep, [%[inptr0], #192] \n"
"1: \n"
"ld1 {v0.8b}, [%[inptr0]], #8 \n" // d8 = y0y1y2.."
"ld1 {v1.8b}, [%[inptr0]], #8 \n" // d8 = y0y1y2.."
// 8->16
"ushll v3.8h, v0.8b, #0 \n"
"ushll v4.8h, v0.8b, #0 \n"
// 16->32
"ushll v6.4s, v3.4h, #0 \n"
"ushll2 v7.4s, v3.8h, #0 \n"
"ushll v8.4s, v4.4h, #0 \n"
"ushll2 v9.4s, v4.8h, #0 \n"
// int32->fp32
"ucvtf v12.4s, v6.4s \n"
"ucvtf v13.4s, v7.4s \n"
"ucvtf v14.4s, v8.4s \n"
"ucvtf v15.4s, v9.4s \n"
// sub -mean
"fsub v12.4s, v12.4s, %w[vmean].4s \n"
"fsub v13.4s, v13.4s, %w[vmean].4s \n"
"fsub v14.4s, v14.4s, %w[vmean].4s \n"
"fsub v15.4s, v15.4s, %w[vmean].4s \n"
// mul * scale
"fmul v6.4s, v12.4s, %w[vscale].4s \n"
"fmul v7.4s, v13.4s, %w[vscale].4s \n"
"fmul v8.4s, v14.4s, %w[vscale].4s \n"
"fmul v9.4s, v15.4s, %w[vscale].4s \n"
// store
"st1 {v6.4s}, [%[outr0]], #16 \n"
"subs %w[cnt], %w[cnt], #1 \n"
"st1 {v7.4s}, [%[outr0]], #16 \n"
"st1 {v8.4s}, [%[outr0]], #16 \n"
"st1 {v9.4s}, [%[outr0]], #16 \n"
"bne 1b \n"
: [inptr0] "+r"(din_ptr), [outr0] "+r"(ptr_h), [cnt] "+r"(cnt)
: [vmean] "w"(vmean), [vscale] "w"(vscale)
: "cc",
"memory",
"v0",
"v1",
"v2",
"v3",
"v4",
"v5",
"v6",
"v7",
"v8",
"v9",
"v10",
"v11",
"v12",
"v13",
"v14",
"v15");
#else
asm volatile(
"pld [%[inptr0]] @ preload a, 64byte\n"
"pld [%[inptr0], #64] @ preload a, 64byte\n"
"pld [%[inptr0], #128] @ preload a, 64byte\n"
"pld [%[inptr0], #192] @ preload a, 64byte\n"
"1: \n"
"vld1.8 {d12, d13}, [%[inptr0]]! \n"
// 8->16
"vmovl.u8 q8, d12 \n"
"vmovl.u8 q9, d13 \n"
// 16->32
"vmovl.u16 q11, d16 \n"
"vmovl.u16 q12, d17 \n"
"vmovl.u16 q13, d18 \n"
"vmovl.u16 q14, d19 \n"
// int32->fp32
"vcvt.f32.u32 q7, q11 \n"
"vcvt.f32.u32 q8, q12 \n"
"vcvt.f32.u32 q9, q13 \n"
"vcvt.f32.u32 q10, q14 \n"
// sub -mean
"vsub.f32 q7, q7, %q[vmean] \n"
"vsub.f32 q8, q8, %q[vmean] \n"
"vsub.f32 q9, q9, %q[vmean] \n"
"vsub.f32 q10, q10, %q[vmean] \n"
// mul *scale
"vmul.f32 q11, q7, %q[vscale] \n"
"vmul.f32 q12, q8, %q[vscale] \n"
"vmul.f32 q13, q9, %q[vscale] \n"
"vmul.f32 q14, q10, %q[vscale] \n"
// store
"vst1.32 {d22 - d23}, [%[outr0]]! \n"
"subs %[cnt], #1 \n"
"vst1.32 {d24 - d25}, [%[outr0]]! \n"
"vst1.32 {d26 - d27}, [%[outr0]]! \n"
"vst1.32 {d28 - d29}, [%[outr0]]! \n"
"bne 1b"
: [inptr0] "+r"(din_ptr), [outr0] "+r"(ptr_h), [cnt] "+r"(cnt)
: [vmean] "w"(vmean), [vscale] "w"(vscale)
: "cc",
"memory",
"q6",
"q7",
"q8",
"q9",
"q10",
"q11",
"q12",
"q13",
"q14");
#endif
}
for (int j = 0; j < remain; j++) {
*ptr_h++ = (*din_ptr - mean_val) * scale_val;
din_ptr++;
}
}
}
void bgr_to_tensor_chw(const uint8_t* src, void bgr_to_tensor_chw(const uint8_t* src,
float* output, float* output,
int width, int width,
...@@ -390,6 +541,7 @@ void bgra_to_tensor_chw(const uint8_t* src, ...@@ -390,6 +541,7 @@ void bgra_to_tensor_chw(const uint8_t* src,
} }
} }
} }
void bgr_to_tensor_hwc(const uint8_t* src, void bgr_to_tensor_hwc(const uint8_t* src,
float* output, float* output,
int width, int width,
......
...@@ -30,10 +30,14 @@ void nv21_to_bgr(const uint8_t* src, uint8_t* dst, int srcw, int srch); ...@@ -30,10 +30,14 @@ void nv21_to_bgr(const uint8_t* src, uint8_t* dst, int srcw, int srch);
void nv21_to_bgra(const uint8_t* src, uint8_t* dst, int srcw, int srch); void nv21_to_bgra(const uint8_t* src, uint8_t* dst, int srcw, int srch);
void nv12_to_bgr(const uint8_t* src, uint8_t* dst, int srcw, int srch); void nv12_to_bgr(const uint8_t* src, uint8_t* dst, int srcw, int srch);
void nv12_to_bgra(const uint8_t* src, uint8_t* dst, int srcw, int srch); void nv12_to_bgra(const uint8_t* src, uint8_t* dst, int srcw, int srch);
// bgra rgba to gray
void hwc4_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch);
// bgr rgb to gray // bgr rgb to gray
void hwc3_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch); void hwc3_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch);
// gray to bgr rgb // gray to bgr rgb
void hwc1_to_hwc3(const uint8_t* src, uint8_t* dst, int srcw, int srch); void hwc1_to_hwc3(const uint8_t* src, uint8_t* dst, int srcw, int srch);
// gray to bgra rgba
void hwc1_to_hwc4(const uint8_t* src, uint8_t* dst, int srcw, int srch);
// bgr to bgra or rgb to rgba // bgr to bgra or rgb to rgba
void hwc3_to_hwc4(const uint8_t* src, uint8_t* dst, int srcw, int srch); void hwc3_to_hwc4(const uint8_t* src, uint8_t* dst, int srcw, int srch);
// bgra to bgr or rgba to rgb // bgra to bgr or rgba to rgb
...@@ -112,6 +116,12 @@ void ImageConvert::choose(const uint8_t* src, ...@@ -112,6 +116,12 @@ void ImageConvert::choose(const uint8_t* src,
} else if ((srcFormat == RGB && dstFormat == BGRA) || } else if ((srcFormat == RGB && dstFormat == BGRA) ||
(srcFormat == BGR && dstFormat == RGBA)) { (srcFormat == BGR && dstFormat == RGBA)) {
impl_ = hwc3_trans_hwc4; impl_ = hwc3_trans_hwc4;
} else if ((srcFormat == GRAY && dstFormat == RGBA) ||
(srcFormat == GRAY && dstFormat == BGRA)) {
impl_ = hwc1_to_hwc4;
} else if ((srcFormat == RGBA && dstFormat == GRAY) ||
(srcFormat == BGRA && dstFormat == GRAY)) {
impl_ = hwc4_to_hwc1;
} else { } else {
printf("srcFormat: %d, dstFormat: %d does not support! \n", printf("srcFormat: %d, dstFormat: %d does not support! \n",
srcFormat, srcFormat,
...@@ -989,7 +999,7 @@ void hwc3_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch) { ...@@ -989,7 +999,7 @@ void hwc3_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
"vshrn.u32 d24, q6, #7 \n" "vshrn.u32 d24, q6, #7 \n"
"vshrn.u32 d25, q7, #7 \n" "vshrn.u32 d25, q7, #7 \n"
"vshrn.u32 d26, q8, #7 \n" "vshrn.u32 d26, q8, #7 \n"
"vshrn.u32 d27, q8, #7 \n" "vshrn.u32 d27, q9, #7 \n"
// 16->8 // 16->8
"vmovn.u16 d4, q10 \n" "vmovn.u16 d4, q10 \n"
"vmovn.u16 d5, q11 \n" "vmovn.u16 d5, q11 \n"
...@@ -1077,6 +1087,280 @@ void hwc3_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch) { ...@@ -1077,6 +1087,280 @@ void hwc3_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
} }
} }
/* /*
采用CV_BGR2GRAY,转换公式Gray = 0.1140*B + 0.5870*G + 0.2989*R
采用CV_RGB2GRAY,转换公式Gray = 0.1140*R + 0.5870*G + 0.2989*B
b = 0.114 *128 = 14.529 = 15
g = 0.587 * 128 = 75.136 = 75
r = 0.2989 * 127 = 38.2592 = 38
Gray = (15*B + 75*G + 38*R)/128
bgra2gray, rgba2gray
*/
void hwc4_to_hwc1(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
uint8_t b = 15;
uint8_t g = 75;
uint8_t r = 38;
uint8x8_t vb = vdup_n_u8(b);
uint8x8_t vg = vdup_n_u8(g);
uint8x8_t vr = vdup_n_u8(r);
#ifdef __aarch64__
#else
uint8_t vb_array[8] = {b, b, b, b, b, b, b, b};
uint8_t vg_array[8] = {g, g, g, g, g, g, g, g};
uint8_t vr_array[8] = {r, r, r, r, r, r, r, r};
#endif
int cnt_pro = srcw >> 3;
int remain_pro = srcw % 8;
int win = srcw * 4;
int i = 0;
#pragma omp parallel for
for (i = 0; i < srch - 3; i += 4) {
int j = 0;
const uint8_t* inptr0 = src + i * win;
const uint8_t* inptr1 = inptr0 + win;
const uint8_t* inptr2 = inptr1 + win;
const uint8_t* inptr3 = inptr2 + win;
uint8_t* outr0 = dst + i * srcw;
uint8_t* outr1 = outr0 + srcw;
uint8_t* outr2 = outr1 + srcw;
uint8_t* outr3 = outr2 + srcw;
int cnt = cnt_pro;
if (cnt > 0) {
#ifdef __aarch64__
asm volatile(
"prfm pldl1keep, [%[inptr0]] \n"
"prfm pldl1keep, [%[inptr0], #128] \n"
"prfm pldl1keep, [%[inptr1]] \n"
"prfm pldl1keep, [%[inptr1], #128] \n"
"prfm pldl1keep, [%[inptr2]] \n"
"prfm pldl1keep, [%[inptr2], #128] \n"
"prfm pldl1keep, [%[inptr3]] \n"
"prfm pldl1keep, [%[inptr3], #128] \n"
"1: \n"
"ld4 {v0.8b - v3.8b}, [%[inptr0]], #32 \n" // d8 = y0y3y6y9.. d9 =
// y1y4y7...
"ld4 {v4.8b - v7.8b}, [%[inptr1]], #32 \n" // d8 = y0y3y6y9.. d9 =
// y1y4y7...
"ld4 {v8.8b - v11.8b}, [%[inptr2]], #32 \n" // d8 = y0y3y6y9.. d9 =
// y1y4y7...
"ld4 {v12.8b - v15.8b}, [%[inptr3]], #32 \n" // d8 = y0y3y6y9.. d9 =
// y1y4y7...
// mul b
"umull v13.8h, v0.8b, %w[vb].8b \n" // v0 * vb
"umull v14.8h, v4.8b, %w[vb].8b \n" // v0 * vb
"umull v15.8h, v8.8b, %w[vb].8b \n" // v0 * vb
"umull v16.8h, v12.8b, %w[vb].8b \n" // v0 * vb
// mul g
"umull v17.8h, v1.8b, %w[vg].8b \n" // v0 * vb
"umull v18.8h, v5.8b, %w[vg].8b \n" // v0 * vb
"umull v19.8h, v9.8b, %w[vg].8b \n" // v0 * vb
"umull v20.8h, v13.8b, %w[vg].8b \n" // v0 * vb
// mul r
"umlal v13.8h, v2.8b, %w[vr].8b \n" // v0 * vb
"umlal v14.8h, v6.8b, %w[vr].8b \n" // v0 * vb
"umlal v15.8h, v10.8b, %w[vr].8b \n" // v0 * vb
"umlal v16.8h, v14.8b, %w[vr].8b \n" // v0 * vb
// 16->32
"uaddl v0.4s, v17.4h, v13.4h \n"
"uaddl2 v1.4s, v17.8h, v13.8h \n"
"uaddl v2.4s, v18.4h, v14.4h \n"
"uaddl2 v3.4s, v18.8h, v14.8h \n"
"uaddl v4.4s, v19.4h, v15.4h \n"
"uaddl2 v5.4s, v19.8h, v15.8h \n"
"uaddl v6.4s, v20.4h, v16.4h \n"
"uaddl2 v7.4s, v20.8h, v16.8h \n"
// 32->16 v0 >> 7
"shrn v12.4h, v0.4s, #7 \n"
"shrn2 v12.8h, v1.4s, #7 \n"
"shrn v13.4h, v2.4s, #7 \n"
"shrn2 v13.8h, v3.4s, #7 \n"
"shrn v14.4h, v4.4s, #7 \n"
"shrn2 v14.8h, v5.4s, #7 \n"
"shrn v15.4h, v6.4s, #7 \n"
"shrn2 v15.8h, v7.4s, #7 \n"
// 16->8
"xtn v0.8b, v12.8h \n"
"xtn v1.8b, v13.8h \n"
"xtn v2.8b, v14.8h \n"
"xtn v3.8b, v15.8h \n"
"subs %w[cnt], %w[cnt], #1 \n"
"st1 {v0.8b}, [%[outr0]], #8 \n"
"st1 {v1.8b}, [%[outr1]], #8 \n"
"st1 {v2.8b}, [%[outr2]], #8 \n"
"st1 {v3.8b}, [%[outr3]], #8 \n"
"bne 1b \n"
: [inptr0] "+r"(inptr0),
[inptr1] "+r"(inptr1),
[inptr2] "+r"(inptr2),
[inptr3] "+r"(inptr3),
[outr0] "+r"(outr0),
[outr1] "+r"(outr1),
[outr2] "+r"(outr2),
[outr3] "+r"(outr3),
[cnt] "+r"(cnt)
: [vb] "w"(vb), [vg] "w"(vg), [vr] "w"(vr)
: "cc",
"memory",
"v0",
"v1",
"v2",
"v3",
"v4",
"v5",
"v6",
"v7",
"v8",
"v9",
"v10",
"v11",
"v12",
"v13",
"v14",
"v15",
"v16",
"v17",
"v18",
"v19",
"v20");
#else
asm volatile(
"pld [%[inptr0]] @ preload a, 64byte\n"
"pld [%[inptr0], #128] @ preload a, 64byte\n"
"pld [%[inptr1]] @ preload a, 64byte\n"
"pld [%[inptr1], #128] @ preload a, 64byte\n"
"pld [%[inptr2]] @ preload a, 64byte\n"
"pld [%[inptr2], #128] @ preload a, 64byte\n"
"pld [%[inptr3]] @ preload a, 64byte\n"
"pld [%[inptr3], #128] @ preload a, 64byte\n"
"vld1.8 d0, [%[vb]] \n"
"vld1.8 d1, [%[vg]] \n"
"vld1.8 d2, [%[vr]] \n"
"1: \n"
"vld4.8 {d3, d4, d5, d6}, [%[inptr0]]! \n"
"vld4.8 {d7, d8, d9, d10}, [%[inptr1]]! \n"
"vld4.8 {d11, d12, d13, d14}, [%[inptr2]]! \n"
"vld4.8 {d15, d16, d17, d18}, [%[inptr3]]! \n"
// vb
"vmull.u8 q10, d3, d0 \n"
"vmull.u8 q11, d7, d0 \n"
"vmull.u8 q12, d11, d0 \n"
"vmull.u8 q13, d15, d0 \n"
// vg
"vmull.u8 q14, d4, d1 \n"
"vmull.u8 q15, d8, d1 \n"
"vmull.u8 q5, d12, d1 \n"
"vmull.u8 q7, d16, d1 \n"
// vr
"vmlal.u8 q10, d5, d2 \n"
"vmlal.u8 q11, d9, d2 \n"
"vmlal.u8 q12, d13, d2 \n"
"vmlal.u8 q13, d17, d2 \n"
// 16->32
"vaddl.u16 q2, d28, d20 \n"
"vaddl.u16 q3, d29, d21 \n"
"vaddl.u16 q4, d30, d22 \n"
"vaddl.u16 q10, d31, d23 \n"
"vaddl.u16 q6, d10, d24 \n"
"vaddl.u16 q11, d11, d25 \n"
"vaddl.u16 q8, d14, d26 \n"
"vaddl.u16 q9, d15, d27 \n"
// 32->16 q2 >> 7
"vshrn.u32 d10, q2, #7 \n"
"vshrn.u32 d11, q3, #7 \n"
"vshrn.u32 d14, q4, #7 \n"
"vshrn.u32 d15, q10, #7 \n"
"vshrn.u32 d24, q6, #7 \n"
"vshrn.u32 d25, q11, #7 \n"
"vshrn.u32 d26, q8, #7 \n"
"vshrn.u32 d27, q9, #7 \n"
// 16->8
"vmovn.u16 d4, q5 \n"
"vmovn.u16 d5, q7 \n"
"vmovn.u16 d6, q12 \n"
"vmovn.u16 d7, q13 \n"
"subs %[cnt], #1 \n"
// store
"vst1.8 d4, [%[outr0]]! \n"
"vst1.8 d5, [%[outr1]]! \n"
"vst1.8 d6, [%[outr2]]! \n"
"vst1.8 d7, [%[outr3]]! \n"
"bne 1b \n"
: [inptr0] "+r"(inptr0),
[inptr1] "+r"(inptr1),
[inptr2] "+r"(inptr2),
[inptr3] "+r"(inptr3),
[outr0] "+r"(outr0),
[outr1] "+r"(outr1),
[outr2] "+r"(outr2),
[outr3] "+r"(outr3),
[cnt] "+r"(cnt)
: [vb] "r"(vb_array), [vg] "r"(vg_array), [vr] "r"(vr_array)
: "cc",
"memory",
"q0",
"q1",
"q2",
"q3",
"q4",
"q5",
"q6",
"q7",
"q8",
"q9",
"q10",
"q11",
"q12",
"q13",
"q14",
"q15");
#endif
}
for (; j < remain_pro; j++) {
*outr0++ = (inptr0[0] * b + inptr0[1] * g + inptr0[2] * r) >> 7;
*outr1++ = (inptr1[0] * b + inptr1[1] * g + inptr1[2] * r) >> 7;
*outr2++ = (inptr2[0] * b + inptr2[1] * g + inptr2[2] * r) >> 7;
*outr3++ = (inptr3[0] * b + inptr3[1] * g + inptr3[2] * r) >> 7;
inptr0 += 4;
inptr1 += 4;
inptr2 += 4;
inptr3 += 4;
}
}
for (; i < srch; i++) {
int j = 0;
const uint8_t* inptr0 = src + i * win;
uint8_t* outr0 = dst + i * srcw;
for (j = 0; j < cnt_pro; j++) {
uint8x8x4_t y0 = vld4_u8(inptr0); // d8 = y0y3y6y9.. d9 = y1y4y7...y
uint16x8_t val0 = vmull_u8(y0.val[0], vb);
uint16x8_t val0_1 = vmull_u8(y0.val[1], vg);
val0 = vmlal_u8(val0, y0.val[2], vr);
uint32x4_t v0_sum0 = vaddl_u16(vget_low_u16(val0_1), vget_low_u16(val0));
uint32x4_t v0_sum1 =
vaddl_u16(vget_high_u16(val0_1), vget_high_u16(val0));
uint16x4_t v0_sum0_16 = vshrn_n_u32(v0_sum0, 7);
uint16x4_t v0_sum1_16 = vshrn_n_u32(v0_sum1, 7);
uint16x8_t v0_sum = vcombine_u16(v0_sum0_16, v0_sum1_16);
uint8x8_t vout0 = vmovn_u16(v0_sum);
inptr0 += 32;
vst1_u8(outr0, vout0);
outr0 += 8;
}
for (; j < srcw; j++) {
*outr0++ = (inptr0[0] * b + inptr0[1] * g + inptr0[2] * r) >> 7;
inptr0 += 4;
}
}
}
/*
采用CV_GRAY2BGR,转换公式B = G = R = Gray 采用CV_GRAY2BGR,转换公式B = G = R = Gray
采用CV_GRAY2RGB,转换公式R = G = B = Gray 采用CV_GRAY2RGB,转换公式R = G = B = Gray
gray2bgr, gray2rgb gray2bgr, gray2rgb
...@@ -1091,6 +1375,22 @@ void hwc1_to_hwc3(const uint8_t* src, uint8_t* dst, int srcw, int srch) { ...@@ -1091,6 +1375,22 @@ void hwc1_to_hwc3(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
} }
} }
} }
/*
采用CV_GRAY2BGRA,转换公式B = G = R = Gray A=255
采用CV_GRAY2RGBA,转换公式R = G = B = Gray A=255
gray2bgra, gray2rgba
*/
void hwc1_to_hwc4(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
for (int i = 0; i < srch; i++) {
for (int j = 0; j < srcw; j++) {
*dst++ = *src;
*dst++ = *src;
*dst++ = *src;
*dst++ = 255;
src++;
}
}
}
// bgr2bgra, rgb2rgba // bgr2bgra, rgb2rgba
void hwc3_to_hwc4(const uint8_t* src, uint8_t* dst, int srcw, int srch) { void hwc3_to_hwc4(const uint8_t* src, uint8_t* dst, int srcw, int srch) {
for (int i = 0; i < srch; i++) { for (int i = 0; i < srch; i++) {
......
...@@ -19,6 +19,23 @@ namespace paddle { ...@@ -19,6 +19,23 @@ namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
void ImageFlip::choose(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
FlipParam flip_param) {
if (srcFormat == GRAY) {
flip_hwc1(src, dst, srcw, srch, flip_param);
} else if (srcFormat == BGR || srcFormat == RGB) {
flip_hwc3(src, dst, srcw, srch, flip_param);
} else if (srcFormat == BGRA || srcFormat == RGBA) {
flip_hwc4(src, dst, srcw, srch, flip_param);
} else {
printf("this srcFormat: %d does not support! \n", srcFormat);
return;
}
}
// gray // gray
void flip_hwc1_x(const uint8_t* src, uint8_t* dst, int w_in, int h_in); void flip_hwc1_x(const uint8_t* src, uint8_t* dst, int w_in, int h_in);
void flip_hwc1_y(const uint8_t* src, uint8_t* dst, int w_in, int h_in); void flip_hwc1_y(const uint8_t* src, uint8_t* dst, int w_in, int h_in);
...@@ -43,6 +60,9 @@ void flip_hwc1(const uint8_t* src, ...@@ -43,6 +60,9 @@ void flip_hwc1(const uint8_t* src,
flip_hwc1_y(src, dst, srcw, srch); flip_hwc1_y(src, dst, srcw, srch);
} else if (flip_param == XY) { } else if (flip_param == XY) {
flip_hwc1_xy(src, dst, srcw, srch); flip_hwc1_xy(src, dst, srcw, srch);
} else {
printf("its doesn't support Flip: %d \n", static_cast<int>(flip_param));
return;
} }
} }
...@@ -57,6 +77,9 @@ void flip_hwc3(const uint8_t* src, ...@@ -57,6 +77,9 @@ void flip_hwc3(const uint8_t* src,
flip_hwc3_y(src, dst, srcw, srch); flip_hwc3_y(src, dst, srcw, srch);
} else if (flip_param == XY) { } else if (flip_param == XY) {
flip_hwc3_xy(src, dst, srcw, srch); flip_hwc3_xy(src, dst, srcw, srch);
} else {
printf("its doesn't support Flip: %d \n", static_cast<int>(flip_param));
return;
} }
} }
...@@ -71,6 +94,9 @@ void flip_hwc4(const uint8_t* src, ...@@ -71,6 +94,9 @@ void flip_hwc4(const uint8_t* src,
flip_hwc4_y(src, dst, srcw, srch); flip_hwc4_y(src, dst, srcw, srch);
} else if (flip_param == XY) { } else if (flip_param == XY) {
flip_hwc4_xy(src, dst, srcw, srch); flip_hwc4_xy(src, dst, srcw, srch);
} else {
printf("its doesn't support Flip: %d \n", static_cast<int>(flip_param));
return;
} }
} }
/* /*
......
...@@ -21,6 +21,15 @@ namespace paddle { ...@@ -21,6 +21,15 @@ namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
class ImageFlip {
public:
void choose(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
FlipParam flip_param);
};
void flip_hwc1( void flip_hwc1(
const uint8_t* src, uint8_t* dst, int srcw, int srch, FlipParam flip_param); const uint8_t* src, uint8_t* dst, int srcw, int srch, FlipParam flip_param);
void flip_hwc3( void flip_hwc3(
......
...@@ -38,6 +38,15 @@ namespace paddle { ...@@ -38,6 +38,15 @@ namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
void ImageResize::choose(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
int dstw,
int dsth) {
resize(src, dst, srcFormat, srcw, srch, dstw, dsth);
}
void compute_xy(int srcw, void compute_xy(int srcw,
int srch, int srch,
int dstw, int dstw,
......
...@@ -39,6 +39,16 @@ namespace paddle { ...@@ -39,6 +39,16 @@ namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
class ImageResize {
public:
void choose(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
int dstw,
int dsth);
};
void resize(const uint8_t* src, void resize(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
ImageFormat srcFormat, ImageFormat srcFormat,
......
...@@ -19,6 +19,26 @@ namespace paddle { ...@@ -19,6 +19,26 @@ namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
void ImageRotate::choose(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
float degree) {
if (degree != 90 && degree != 180 && degree != 270) {
printf("this degree: %f not support \n", degree);
}
if (srcFormat == GRAY) {
rotate_hwc1(src, dst, srcw, srch, degree);
} else if (srcFormat == BGR || srcFormat == RGB) {
rotate_hwc3(src, dst, srcw, srch, degree);
} else if (srcFormat == BGRA || srcFormat == RGBA) {
rotate_hwc4(src, dst, srcw, srch, degree);
} else {
printf("this srcFormat: %d does not support! \n", srcFormat);
return;
}
}
// gray // gray
void rotate_hwc1_90( void rotate_hwc1_90(
const uint8_t* src, uint8_t* dst, int w_in, int h_in, int w_out, int h_out); const uint8_t* src, uint8_t* dst, int w_in, int h_in, int w_out, int h_out);
...@@ -50,6 +70,9 @@ void rotate_hwc1( ...@@ -50,6 +70,9 @@ void rotate_hwc1(
rotate_hwc1_180(src, dst, srcw, srch, srcw, srch); rotate_hwc1_180(src, dst, srcw, srch, srcw, srch);
} else if (degree == 270) { } else if (degree == 270) {
rotate_hwc1_270(src, dst, srcw, srch, srch, srcw); rotate_hwc1_270(src, dst, srcw, srch, srch, srcw);
} else {
printf("this degree: %f does not support! \n", degree);
return;
} }
} }
...@@ -61,6 +84,9 @@ void rotate_hwc3( ...@@ -61,6 +84,9 @@ void rotate_hwc3(
rotate_hwc3_180(src, dst, srcw, srch, srcw, srch); rotate_hwc3_180(src, dst, srcw, srch, srcw, srch);
} else if (degree == 270) { } else if (degree == 270) {
rotate_hwc3_270(src, dst, srcw, srch, srch, srcw); rotate_hwc3_270(src, dst, srcw, srch, srch, srcw);
} else {
printf("this degree: %f does not support! \n", degree);
return;
} }
} }
...@@ -72,6 +98,9 @@ void rotate_hwc4( ...@@ -72,6 +98,9 @@ void rotate_hwc4(
rotate_hwc4_180(src, dst, srcw, srch, srcw, srch); rotate_hwc4_180(src, dst, srcw, srch, srcw, srch);
} else if (degree == 270) { } else if (degree == 270) {
rotate_hwc4_270(src, dst, srcw, srch, srch, srcw); rotate_hwc4_270(src, dst, srcw, srch, srch, srcw);
} else {
printf("this degree: %f does not support! \n", degree);
return;
} }
} }
#ifdef __aarch64__ #ifdef __aarch64__
...@@ -578,6 +607,7 @@ void rotate_hwc1_90(const uint8_t* src, ...@@ -578,6 +607,7 @@ void rotate_hwc1_90(const uint8_t* src,
int stride_h = 4 * w_in; int stride_h = 4 * w_in;
int stride_h_w = 4 * w_in - 8; int stride_h_w = 4 * w_in - 8;
int stride_out = 4 * w_out; int stride_out = 4 * w_out;
int ww = w_out - 8;
#pragma omp parallel for #pragma omp parallel for
for (i = 0; i < h_in - 7; i += 8) { for (i = 0; i < h_in - 7; i += 8) {
const uint8_t* inptr0 = src + i * w_in; const uint8_t* inptr0 = src + i * w_in;
...@@ -586,7 +616,7 @@ void rotate_hwc1_90(const uint8_t* src, ...@@ -586,7 +616,7 @@ void rotate_hwc1_90(const uint8_t* src,
const uint8_t* inptr3 = inptr2 + w_in; const uint8_t* inptr3 = inptr2 + w_in;
int j = 0; int j = 0;
for (; j < w_in - 7; j += 8) { for (; j < w_in - 7; j += 8) {
uint8_t* outptr0 = dst + j * w_out + i; uint8_t* outptr0 = dst + j * w_out + (ww - i);
uint8_t* outptr1 = outptr0 + w_out; uint8_t* outptr1 = outptr0 + w_out;
uint8_t* outptr2 = outptr1 + w_out; uint8_t* outptr2 = outptr1 + w_out;
uint8_t* outptr3 = outptr2 + w_out; uint8_t* outptr3 = outptr2 + w_out;
...@@ -648,7 +678,7 @@ void rotate_hwc1_90(const uint8_t* src, ...@@ -648,7 +678,7 @@ void rotate_hwc1_90(const uint8_t* src,
const uint8_t* inptr6 = inptr5 + w_in; const uint8_t* inptr6 = inptr5 + w_in;
const uint8_t* inptr7 = inptr6 + w_in; const uint8_t* inptr7 = inptr6 + w_in;
for (; j < w_in; j++) { for (; j < w_in; j++) {
uint8_t* outptr = dst + j * w_out + i; uint8_t* outptr = dst + j * w_out + ww - i;
*outptr++ = *inptr0++; *outptr++ = *inptr0++;
*outptr++ = *inptr1++; *outptr++ = *inptr1++;
*outptr++ = *inptr2++; *outptr++ = *inptr2++;
...@@ -659,10 +689,11 @@ void rotate_hwc1_90(const uint8_t* src, ...@@ -659,10 +689,11 @@ void rotate_hwc1_90(const uint8_t* src,
*outptr++ = *inptr7++; *outptr++ = *inptr7++;
} }
} }
ww = w_out - 1;
for (; i < h_in; i++) { for (; i < h_in; i++) {
const uint8_t* inptr0 = src + i * w_in; const uint8_t* inptr0 = src + i * w_in;
for (int j = 0; j < w_in; j++) { for (int j = 0; j < w_in; j++) {
uint8_t* outptr0 = dst + j * w_out + i; uint8_t* outptr0 = dst + j * w_out + ww - i;
*outptr0 = *inptr0++; *outptr0 = *inptr0++;
} }
} }
...@@ -693,9 +724,9 @@ void rotate_hwc1_180(const uint8_t* src, ...@@ -693,9 +724,9 @@ void rotate_hwc1_180(const uint8_t* src,
const uint8_t* inptr3 = inptr2 + w_in; const uint8_t* inptr3 = inptr2 + w_in;
uint8_t* outptr0 = dst + (h_in - i) * w_out - stride_w; // last uint8_t* outptr0 = dst + (h_in - i) * w_out - stride_w; // last
uint8_t* outptr1 = outptr0 + w_out; uint8_t* outptr1 = outptr0 - w_out;
uint8_t* outptr2 = outptr1 + w_out; uint8_t* outptr2 = outptr1 - w_out;
uint8_t* outptr3 = outptr2 + w_out; uint8_t* outptr3 = outptr2 - w_out;
if (i + 3 >= h_in) { if (i + 3 >= h_in) {
uint8_t* ptr = zerobuff + w_in - stride_w; uint8_t* ptr = zerobuff + w_in - stride_w;
......
...@@ -16,10 +16,20 @@ ...@@ -16,10 +16,20 @@
#include <stdint.h> #include <stdint.h>
#include <vector> #include <vector>
#include "lite/utils/cv/paddle_image_preprocess.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
class ImageRotate {
public:
void choose(const uint8_t* src,
uint8_t* dst,
ImageFormat srcFormat,
int srcw,
int srch,
float degree);
};
void rotate_hwc1( void rotate_hwc1(
const uint8_t* src, uint8_t* dst, int srcw, int srch, float degree); const uint8_t* src, uint8_t* dst, int srcw, int srch, float degree);
void rotate_hwc3( void rotate_hwc3(
......
...@@ -25,7 +25,6 @@ namespace paddle { ...@@ -25,7 +25,6 @@ namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
namespace cv { namespace cv {
#define PI 3.14159265f #define PI 3.14159265f
#define Degrees2Radians(degrees) ((degrees) * (SK_ScalarPI / 180)) #define Degrees2Radians(degrees) ((degrees) * (SK_ScalarPI / 180))
#define Radians2Degrees(radians) ((radians) * (180 / SK_ScalarPI)) #define Radians2Degrees(radians) ((radians) * (180 / SK_ScalarPI))
...@@ -38,7 +37,7 @@ ImagePreprocess::ImagePreprocess(ImageFormat srcFormat, ...@@ -38,7 +37,7 @@ ImagePreprocess::ImagePreprocess(ImageFormat srcFormat,
this->dstFormat_ = dstFormat; this->dstFormat_ = dstFormat;
this->transParam_ = param; this->transParam_ = param;
} }
void ImagePreprocess::imageCovert(const uint8_t* src, uint8_t* dst) { void ImagePreprocess::imageConvert(const uint8_t* src, uint8_t* dst) {
ImageConvert img_convert; ImageConvert img_convert;
img_convert.choose(src, img_convert.choose(src,
dst, dst,
...@@ -48,7 +47,7 @@ void ImagePreprocess::imageCovert(const uint8_t* src, uint8_t* dst) { ...@@ -48,7 +47,7 @@ void ImagePreprocess::imageCovert(const uint8_t* src, uint8_t* dst) {
this->transParam_.ih); this->transParam_.ih);
} }
void ImagePreprocess::imageCovert(const uint8_t* src, void ImagePreprocess::imageConvert(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
ImageFormat srcFormat, ImageFormat srcFormat,
ImageFormat dstFormat) { ImageFormat dstFormat) {
...@@ -68,7 +67,8 @@ void ImagePreprocess::imageResize(const uint8_t* src, ...@@ -68,7 +67,8 @@ void ImagePreprocess::imageResize(const uint8_t* src,
int srch, int srch,
int dstw, int dstw,
int dsth) { int dsth) {
resize(src, dst, srcFormat, srcw, srch, dstw, dsth); ImageResize img_resize;
img_resize.choose(src, dst, srcFormat, srcw, srch, dstw, dsth);
} }
void ImagePreprocess::imageResize(const uint8_t* src, uint8_t* dst) { void ImagePreprocess::imageResize(const uint8_t* src, uint8_t* dst) {
...@@ -77,7 +77,8 @@ void ImagePreprocess::imageResize(const uint8_t* src, uint8_t* dst) { ...@@ -77,7 +77,8 @@ void ImagePreprocess::imageResize(const uint8_t* src, uint8_t* dst) {
int dstw = this->transParam_.ow; int dstw = this->transParam_.ow;
int dsth = this->transParam_.oh; int dsth = this->transParam_.oh;
auto srcFormat = this->dstFormat_; auto srcFormat = this->dstFormat_;
resize(src, dst, srcFormat, srcw, srch, dstw, dsth); ImageResize img_resize;
img_resize.choose(src, dst, srcFormat, srcw, srch, dstw, dsth);
} }
void ImagePreprocess::imageRotate(const uint8_t* src, void ImagePreprocess::imageRotate(const uint8_t* src,
...@@ -86,19 +87,8 @@ void ImagePreprocess::imageRotate(const uint8_t* src, ...@@ -86,19 +87,8 @@ void ImagePreprocess::imageRotate(const uint8_t* src,
int srcw, int srcw,
int srch, int srch,
float degree) { float degree) {
if (degree != 90 && degree != 180 && degree != 270) { ImageRotate img_rotate;
printf("this degree: %f not support \n", degree); img_rotate.choose(src, dst, srcFormat, srcw, srch, degree);
}
if (srcFormat == GRAY) {
rotate_hwc1(src, dst, srcw, srch, degree);
} else if (srcFormat == BGR || srcFormat == RGB) {
rotate_hwc3(src, dst, srcw, srch, degree);
} else if (srcFormat == BGRA || srcFormat == RGBA) {
rotate_hwc4(src, dst, srcw, srch, degree);
} else {
printf("this srcFormat: %d does not support! \n", srcFormat);
return;
}
} }
void ImagePreprocess::imageRotate(const uint8_t* src, uint8_t* dst) { void ImagePreprocess::imageRotate(const uint8_t* src, uint8_t* dst) {
...@@ -106,10 +96,8 @@ void ImagePreprocess::imageRotate(const uint8_t* src, uint8_t* dst) { ...@@ -106,10 +96,8 @@ void ImagePreprocess::imageRotate(const uint8_t* src, uint8_t* dst) {
auto srch = this->transParam_.oh; auto srch = this->transParam_.oh;
auto srcFormat = this->dstFormat_; auto srcFormat = this->dstFormat_;
auto degree = this->transParam_.rotate_param; auto degree = this->transParam_.rotate_param;
if (degree != 90 && degree != 180 && degree != 270) { ImageRotate img_rotate;
printf("this degree: %f not support \n", degree); img_rotate.choose(src, dst, srcFormat, srcw, srch, degree);
}
ImagePreprocess::imageRotate(src, dst, srcFormat, srcw, srch, degree);
} }
void ImagePreprocess::imageFlip(const uint8_t* src, void ImagePreprocess::imageFlip(const uint8_t* src,
...@@ -118,16 +106,8 @@ void ImagePreprocess::imageFlip(const uint8_t* src, ...@@ -118,16 +106,8 @@ void ImagePreprocess::imageFlip(const uint8_t* src,
int srcw, int srcw,
int srch, int srch,
FlipParam flip_param) { FlipParam flip_param) {
if (srcFormat == GRAY) { ImageFlip img_flip;
flip_hwc1(src, dst, srcw, srch, flip_param); img_flip.choose(src, dst, srcFormat, srcw, srch, flip_param);
} else if (srcFormat == BGR || srcFormat == RGB) {
flip_hwc3(src, dst, srcw, srch, flip_param);
} else if (srcFormat == BGRA || srcFormat == RGBA) {
flip_hwc4(src, dst, srcw, srch, flip_param);
} else {
printf("this srcFormat: %d does not support! \n", srcFormat);
return;
}
} }
void ImagePreprocess::imageFlip(const uint8_t* src, uint8_t* dst) { void ImagePreprocess::imageFlip(const uint8_t* src, uint8_t* dst) {
...@@ -135,7 +115,8 @@ void ImagePreprocess::imageFlip(const uint8_t* src, uint8_t* dst) { ...@@ -135,7 +115,8 @@ void ImagePreprocess::imageFlip(const uint8_t* src, uint8_t* dst) {
auto srch = this->transParam_.oh; auto srch = this->transParam_.oh;
auto srcFormat = this->dstFormat_; auto srcFormat = this->dstFormat_;
auto flip_param = this->transParam_.flip_param; auto flip_param = this->transParam_.flip_param;
ImagePreprocess::imageFlip(src, dst, srcFormat, srcw, srch, flip_param); ImageFlip img_flip;
img_flip.choose(src, dst, srcFormat, srcw, srch, flip_param);
} }
void ImagePreprocess::image2Tensor(const uint8_t* src, void ImagePreprocess::image2Tensor(const uint8_t* src,
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <vector> #include <vector>
#include "lite/api/paddle_api.h" #include "lite/api/paddle_api.h"
#include "lite/api/paddle_place.h" #include "lite/api/paddle_place.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace utils { namespace utils {
...@@ -37,9 +38,9 @@ enum ImageFormat { ...@@ -37,9 +38,9 @@ enum ImageFormat {
}; };
// flip enum // flip enum
enum FlipParam { enum FlipParam {
XY = -1, // flip along the XY axis
X = 0, // flip along the X axis X = 0, // flip along the X axis
Y, // flip along the Y axis Y // flip along the Y axis
XY // flip along the XY axis
}; };
// transform param // transform param
typedef struct { typedef struct {
...@@ -69,11 +70,12 @@ class ImagePreprocess { ...@@ -69,11 +70,12 @@ class ImagePreprocess {
* BGR(RGB)and BGRA(RGBA) transform, * BGR(RGB)and BGRA(RGBA) transform,
* BGR(RGB)and RGB(BGR) transform, * BGR(RGB)and RGB(BGR) transform,
* BGR(RGB)and RGBA(BGRA) transform, * BGR(RGB)and RGBA(BGRA) transform,
* BGR(RGB)and GRAY transform, * BGR(RGB) and GRAY transform,
* BGRA(RGBA) and GRAY transform,
* param src: input image data * param src: input image data
* param dst: output image data * param dst: output image data
*/ */
void imageCovert(const uint8_t* src, uint8_t* dst); void imageConvert(const uint8_t* src, uint8_t* dst);
/* /*
* image color convert * image color convert
* support NV12/NV21_to_BGR(RGB), NV12/NV21_to_BGRA(RGBA), * support NV12/NV21_to_BGR(RGB), NV12/NV21_to_BGRA(RGBA),
...@@ -81,6 +83,7 @@ class ImagePreprocess { ...@@ -81,6 +83,7 @@ class ImagePreprocess {
* BGR(RGB)and RGB(BGR) transform, * BGR(RGB)and RGB(BGR) transform,
* BGR(RGB)and RGBA(BGRA) transform, * BGR(RGB)and RGBA(BGRA) transform,
* BGR(RGB)and GRAY transform, * BGR(RGB)and GRAY transform,
* BGRA(RGBA) and GRAY transform,
* param src: input image data * param src: input image data
* param dst: output image data * param dst: output image data
* param srcFormat: input image image format support: GRAY, NV12(NV21), * param srcFormat: input image image format support: GRAY, NV12(NV21),
...@@ -88,7 +91,7 @@ class ImagePreprocess { ...@@ -88,7 +91,7 @@ class ImagePreprocess {
* param dstFormat: output image image format, support GRAY, BGR(RGB) and * param dstFormat: output image image format, support GRAY, BGR(RGB) and
* BGRA(RGBA) * BGRA(RGBA)
*/ */
void imageCovert(const uint8_t* src, void imageConvert(const uint8_t* src,
uint8_t* dst, uint8_t* dst,
ImageFormat srcFormat, ImageFormat srcFormat,
ImageFormat dstFormat); ImageFormat dstFormat);
...@@ -171,7 +174,8 @@ class ImagePreprocess { ...@@ -171,7 +174,8 @@ class ImagePreprocess {
FlipParam flip_param); FlipParam flip_param);
/* /*
* change image data to tensor data * change image data to tensor data
* support image format is BGR(RGB) and BGRA(RGBA), Data layout is NHWC and * support image format is GRAY, BGR(RGB) and BGRA(RGBA), Data layout is NHWC
* and
* NCHW * NCHW
* param src: input image data * param src: input image data
* param dstTensor: output tensor data * param dstTensor: output tensor data
...@@ -186,7 +190,8 @@ class ImagePreprocess { ...@@ -186,7 +190,8 @@ class ImagePreprocess {
float* scales); float* scales);
/* /*
* change image data to tensor data * change image data to tensor data
* support image format is BGR(RGB) and BGRA(RGBA), Data layout is NHWC and * support image format is GRAY, BGR(RGB) and BGRA(RGBA), Data layout is NHWC
* and
* NCHW * NCHW
* param src: input image data * param src: input image data
* param dstTensor: output tensor data * param dstTensor: output tensor data
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册