From 7ebb6f2945bcd82fd215e295313586e42cfbb4cf Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Sat, 8 May 2021 17:02:03 +0800 Subject: [PATCH] add static lite demo (#2899) --- deploy/lite/README.md | 17 +- static/deploy/lite/Makefile | 61 ++++ static/deploy/lite/README.md | 264 +++++++++++++++ static/deploy/lite/coco_label_list.txt | 80 +++++ static/deploy/lite/config_ppyolo_tiny.txt | 12 + static/deploy/lite/run_detection.cc | 390 ++++++++++++++++++++++ 6 files changed, 816 insertions(+), 8 deletions(-) create mode 100644 static/deploy/lite/Makefile create mode 100644 static/deploy/lite/README.md create mode 100644 static/deploy/lite/coco_label_list.txt create mode 100644 static/deploy/lite/config_ppyolo_tiny.txt create mode 100644 static/deploy/lite/run_detection.cc diff --git a/deploy/lite/README.md b/deploy/lite/README.md index ed76b1994..d32fd5434 100644 --- a/deploy/lite/README.md +++ b/deploy/lite/README.md @@ -195,18 +195,19 @@ cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/ 执行完成后,detection文件夹下将有如下文件格式: ``` -demo/cxx/clas/ +demo/cxx/detection/ |-- debug/ -| |--ppyolo_tiny.nb 优化后的检测器模型文件 -| |--000000014439.jpg 待测试图像 -| |--coco_label_list.txt 类别映射文件 +| |--ppyolo_tiny.nb 优化后的检测器模型文件 +| |--000000014439.jpg 待测试图像 +| |--coco_label_list.txt 类别映射文件 | |--libpaddle_light_api_shared.so C++预测库文件 -| |--config_ppyolo_tiny.txt 分类预测超参数配置 -|-- image_classfication.cpp 图像分类代码文件 -|-- Makefile 编译文件 +| |--config_ppyolo_tiny.txt 检测模型预测超参数配置 +|-- run_detection.cc 目标检测代码文件 +|-- Makefile 编译文件 ``` -#### 注意: +**注意:** + * 上述文件中,`coco_label_list.txt` 是COCO数据集的类别映射文件,如果使用自定义的类别,需要更换该类别映射文件。 * `config_ppyolo_tiny.txt` 包含了检测器的超参数,如下: diff --git a/static/deploy/lite/Makefile b/static/deploy/lite/Makefile new file mode 100644 index 000000000..09c581c34 --- /dev/null +++ b/static/deploy/lite/Makefile @@ -0,0 +1,61 @@ +ARM_ABI = arm8 +export ARM_ABI + +include ../Makefile.def + +LITE_ROOT=../../../ + +THIRD_PARTY_DIR=${LITE_ROOT}/third_party + +OPENCV_VERSION=opencv4.1.0 + +OPENCV_LIBS = ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgcodecs.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_imgproc.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/libs/libopencv_core.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtegra_hal.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjpeg-turbo.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibwebp.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibpng.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibjasper.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/liblibtiff.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libIlmImf.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libtbb.a \ + ${THIRD_PARTY_DIR}/${OPENCV_VERSION}/arm64-v8a/3rdparty/libs/libcpufeatures.a + +OPENCV_INCLUDE = -I../../../third_party/${OPENCV_VERSION}/arm64-v8a/include + +CXX_INCLUDES = $(INCLUDES) ${OPENCV_INCLUDE} -I$(LITE_ROOT)/cxx/include + +CXX_LIBS = ${OPENCV_LIBS} -L$(LITE_ROOT)/cxx/lib/ -lpaddle_light_api_shared $(SYSTEM_LIBS) + +############################################################### +# How to use one of static libaray: # +# `libpaddle_api_full_bundled.a` # +# `libpaddle_api_light_bundled.a` # +############################################################### +# Note: default use lite's shared library. # +############################################################### +# 1. Comment above line using `libpaddle_light_api_shared.so` +# 2. Undo comment below line using `libpaddle_api_light_bundled.a` + +#CXX_LIBS = $(LITE_ROOT)/cxx/lib/libpaddle_api_light_bundled.a $(SYSTEM_LIBS) + +detect_system: fetch_opencv detect_system.o + $(CC) $(SYSROOT_LINK) $(CXXFLAGS_LINK) detect_system.o -o detect_system $(CXX_LIBS) $(LDFLAGS) + +detect_system.o: run_detection.cc + $(CC) $(SYSROOT_COMPLILE) $(CXX_DEFINES) $(CXX_INCLUDES) $(CXX_FLAGS) -o detect_system.o -c run_detection.cc + +fetch_opencv: + @ test -d ${THIRD_PARTY_DIR} || mkdir ${THIRD_PARTY_DIR} + @ test -e ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz || \ + (echo "fetch opencv libs" && \ + wget -P ${THIRD_PARTY_DIR} https://paddle-inference-dist.bj.bcebos.com/${OPENCV_VERSION}.tar.gz) + @ test -d ${THIRD_PARTY_DIR}/${OPENCV_VERSION} || \ + tar -zxvf ${THIRD_PARTY_DIR}/${OPENCV_VERSION}.tar.gz -C ${THIRD_PARTY_DIR} + + +.PHONY: clean +clean: + rm -f detect_system.o + rm -f detect_system diff --git a/static/deploy/lite/README.md b/static/deploy/lite/README.md new file mode 100644 index 000000000..2a6b58814 --- /dev/null +++ b/static/deploy/lite/README.md @@ -0,0 +1,264 @@ +# Paddle-Lite端侧部署 + +本教程将介绍基于[Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite) 在移动端部署PaddleDetection的**静态图**模型的详细步骤。 + +Paddle Lite是飞桨轻量化推理引擎,为手机、IOT端提供高效推理能力,并广泛整合跨平台硬件,为端侧部署及应用落地问题提供轻量化的部署方案。 + +## 1. 准备环境 + +### 运行准备 +- 电脑(编译Paddle Lite) +- 安卓手机(armv7或armv8) + +### 1.1 准备交叉编译环境 +交叉编译环境用于编译 Paddle Lite 和 PaddleDetection 的C++ demo。 +支持多种开发环境,不同开发环境的编译流程请参考对应文档。 + +1. [Docker](https://paddle-lite.readthedocs.io/zh/latest/source_compile/compile_env.html#docker) +2. [Linux](https://paddle-lite.readthedocs.io/zh/latest/source_compile/compile_env.html#linux) +3. [MAC OS](https://paddle-lite.readthedocs.io/zh/latest/source_compile/compile_env.html#mac-os) + +### 1.2 准备预测库 + +预测库有两种获取方式: +1. [**建议**]直接下载,预测库下载链接如下: + |平台|预测库下载链接| + |-|-| + |Android|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.8/inference_lite_lib.android.armv7.gcc.c++_static.with_extra.with_cv.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.8/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.with_cv.tar.gz)| + |iOS|[arm7](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.8/inference_lite_lib.ios.armv7.with_cv.with_extra.tiny_publish.tar.gz) / [arm8](https://github.com/PaddlePaddle/Paddle-Lite/releases/download/v2.8/inference_lite_lib.ios.armv8.with_cv.with_extra.tiny_publish.tar.gz)| + + 注:1. 如果是从 Paddle-Lite [官方文档](https://paddle-lite.readthedocs.io/zh/latest/quick_start/release_lib.html#android-toolchain-gcc)下载的预测库,注意选择`with_extra=ON,with_cv=ON`的下载链接。 + +2. 编译Paddle-Lite得到预测库,Paddle-Lite的编译方式如下: +```shell +git clone https://github.com/PaddlePaddle/Paddle-Lite.git +cd Paddle-Lite +# 如果使用编译方式,建议使用develop分支编译预测库 +git checkout develop +./lite/tools/build_android.sh --arch=armv8 --with_cv=ON --with_extra=ON +``` + +**注意**:编译Paddle-Lite获得预测库时,需要打开`--with_cv=ON --with_extra=ON`两个选项,`--arch`表示`arm`版本,这里指定为armv8,更多编译命令介绍请参考[链接](https://paddle-lite.readthedocs.io/zh/latest/source_compile/compile_andriod.html#id2)。 + +直接下载预测库并解压后,可以得到`inference_lite_lib.android.armv8.gcc.c++_static.with_extra.with_cv/`文件夹,通过编译Paddle-Lite得到的预测库位于`Paddle-Lite/build.lite.android.armv8.gcc/inference_lite_lib.android.armv8/`文件夹下。 +预测库的文件目录如下: + +``` +inference_lite_lib.android.armv8/ +|-- cxx C++ 预测库和头文件 +| |-- include C++ 头文件 +| | |-- paddle_api.h +| | |-- paddle_image_preprocess.h +| | |-- paddle_lite_factory_helper.h +| | |-- paddle_place.h +| | |-- paddle_use_kernels.h +| | |-- paddle_use_ops.h +| | `-- paddle_use_passes.h +| `-- lib C++预测库 +| |-- libpaddle_api_light_bundled.a C++静态库 +| `-- libpaddle_light_api_shared.so C++动态库 +|-- java Java预测库 +| |-- jar +| | `-- PaddlePredictor.jar +| |-- so +| | `-- libpaddle_lite_jni.so +| `-- src +|-- demo C++和Java示例代码 +| |-- cxx C++ 预测库demo +| `-- java Java 预测库demo +``` + +## 2 开始运行 + +### 2.1 模型优化 + +Paddle-Lite 提供了多种策略来自动优化原始的模型,其中包括量化、子图融合、混合调度、Kernel优选等方法,使用Paddle-Lite的`opt`工具可以自动对inference模型进行优化,目前支持两种优化方式,优化后的模型更轻量,模型运行速度更快。 + +**注意**:如果已经准备好了 `.nb` 结尾的模型文件,可以跳过此步骤。 + +#### 2.1.1 安装paddle_lite_opt工具 +安装paddle_lite_opt工具有如下两种方法: +1. [**建议**]pip安装paddlelite并进行转换 + ```shell + pip install paddlelite + ``` + +2. 源码编译Paddle-Lite生成opt工具 + + 模型优化需要Paddle-Lite的`opt`可执行文件,可以通过编译Paddle-Lite源码获得,编译步骤如下: + ```shell + # 如果准备环境时已经clone了Paddle-Lite,则不用重新clone Paddle-Lite + git clone https://github.com/PaddlePaddle/Paddle-Lite.git + cd Paddle-Lite + git checkout develop + # 启动编译 + ./lite/tools/build.sh build_optimize_tool + ``` + + 编译完成后,`opt`文件位于`build.opt/lite/api/`下,可通过如下方式查看`opt`的运行选项和使用方式; + ```shell + cd build.opt/lite/api/ + ./opt + ``` + + `opt`的使用方式与参数与上面的`paddle_lite_opt`完全一致。 + +之后使用`paddle_lite_opt`工具可以进行inference模型的转换。`paddle_lite_opt`的部分参数如下: + +|选项|说明| +|-|-| +|--model_file|待优化的PaddlePaddle模型(combined形式)的网络结构文件路径| +|--param_file|待优化的PaddlePaddle模型(combined形式)的权重文件路径| +|--optimize_out_type|输出模型类型,目前支持两种类型:protobuf和naive_buffer,其中naive_buffer是一种更轻量级的序列化/反序列化实现,默认为naive_buffer| +|--optimize_out|优化模型的输出路径| +|--valid_targets|指定模型可执行的backend,默认为arm。目前可支持x86、arm、opencl、npu、xpu,可以同时指定多个backend(以空格分隔),Model Optimize Tool将会自动选择最佳方式。如果需要支持华为NPU(Kirin 810/990 Soc搭载的达芬奇架构NPU),应当设置为npu, arm| + +更详细的`paddle_lite_opt`工具使用说明请参考[使用opt转化模型文档](https://paddle-lite.readthedocs.io/zh/latest/user_guides/opt/opt_bin.html) + +`--model_file`表示inference模型的model文件地址,`--param_file`表示inference模型的param文件地址;`optimize_out`用于指定输出文件的名称(不需要添加`.nb`的后缀)。直接在命令行中运行`paddle_lite_opt`,也可以查看所有参数及其说明。 + + +#### 2.1.3 转换示例 + +下面以PaddleDetection中的 `PP-YOLO-tiny` 模型为例,介绍使用`paddle_lite_opt`完成预训练模型到inference模型,再到Paddle-Lite优化模型的转换。 + +```shell +# 进入PaddleDetection根目录 +cd PaddleDetection_root_path +# 进入静态图模型文件夹 +cd static + +# 将预训练模型导出为inference模型 +python tools/export_model.py -c configs/ppyolo/ppyolo_tiny.yml -o weights=https://paddlemodels.bj.bcebos.com/object_detection/ppyolo_tiny.pdparams + +# 将inference模型转化为Paddle-Lite优化模型 +paddle_lite_opt --model_file=output/ppyolo_tiny/__model__ --param_file=output/ppyolo_tiny/__params__ --optimize_out=ppyolo_tiny +``` + +最终在当前文件夹下生成`ppyolo_tiny.nb`的文件。 + +**注意**:`--optimize_out` 参数为优化后模型的保存路径,无需加后缀`.nb`;`--model_file` 参数为模型结构信息文件的路径,`--param_file` 参数为模型权重信息文件的路径,请注意文件名。 + +### 2.2 与手机联调 + +首先需要进行一些准备工作。 +1. 准备一台arm8的安卓手机,如果编译的预测库和opt文件是armv7,则需要arm7的手机,并修改Makefile中`ARM_ABI = arm7`。 +2. 电脑上安装ADB工具,用于调试。 ADB安装方式如下: + + 2.1. MAC电脑安装ADB: + + ```shell + brew cask install android-platform-tools + ``` + 2.2. Linux安装ADB + ```shell + sudo apt update + sudo apt install -y wget adb + ``` + 2.3. Window安装ADB + + win上安装需要去谷歌的安卓平台下载ADB软件包进行安装:[链接](https://developer.android.com/studio) + +3. 手机连接电脑后,开启手机`USB调试`选项,选择`文件传输`模式,在电脑终端中输入: + +```shell +adb devices +``` +如果有device输出,则表示安装成功,如下所示: +``` +List of devices attached +744be294 device +``` + +4. 准备优化后的模型、预测库文件、测试图像和类别映射文件。 + +```shell +cd PaddleDetection_root_path +cd static/deploy/lite/ + +# 将预测库文件、测试图像和使用的类别字典文件放置在预测库中的demo/cxx/detection文件夹下 +inference_lite_path=/{lite prediction library path}/inference_lite_lib.android.armv8.gcc.c++_static.with_extra.with_cv/ +mkdir -p $inference_lite_path/demo/cxx/detection/debug/ +cp ../../ppyolo_tiny.nb $inference_lite_path/demo/cxx/detection/debug/ +cp ./coco_label_list.txt $inference_lite_path/demo/cxx/detection/debug/ +cp Makefile run_detection.cc $inference_lite_path/demo/cxx/detection/ +cp ./config_ppyolo_tiny.txt $inference_lite_path/demo/cxx/detection/debug/ +cp ../../demo/000000014439.jpg $inference_lite_path/demo/cxx/detection/debug/ + + +# 进入lite demo的工作目录 +cd /{lite prediction library path}/inference_lite_lib.android.armv8/ +cd demo/cxx/detection/ + +# 将C++预测动态库so文件复制到debug文件夹中 +cp ../../../cxx/lib/libpaddle_light_api_shared.so ./debug/ +``` + +执行完成后,detection文件夹下将有如下文件格式: + +``` +demo/cxx/detection/ +|-- debug/ +| |--ppyolo_tiny.nb 优化后的检测器模型文件 +| |--000000014439.jpg 待测试图像 +| |--coco_label_list.txt 类别映射文件 +| |--libpaddle_light_api_shared.so C++预测库文件 +| |--config_ppyolo_tiny.txt 检测模型预测超参数配置 +|-- run_detection.cc 目标检测代码文件 +|-- Makefile 编译文件 +``` + +**注意:** + +* 上述文件中,`coco_label_list.txt` 是COCO数据集的类别映射文件,如果使用自定义的类别,需要更换该类别映射文件。 + +* `config_ppyolo_tiny.txt` 包含了检测器的超参数,如下: + +```shell +model_file ./ppyolo_tiny.nb # 模型文件地址 +label_path ./coco_label_list.txt # 类别映射文本文件 +num_threads 1 # 线程数 +enable_benchmark 1 # 是否运行benchmark +Resize 320,320 # resize图像尺寸 +keep_ratio False # 是否keep ratio +mean 0.485,0.456,0.406 # 预处理均值 +std 0.229,0.224,0.225 # 预处理方差 +precision fp32 # 模型精度 +``` + +5. 启动调试,上述步骤完成后就可以使用ADB将文件夹 `debug/` push到手机上运行,步骤如下: + +```shell +# 执行编译,得到可执行文件detect_system +make + +# 将编译得到的可执行文件移动到debug文件夹中 +mv detect_system ./debug/ + +# 将上述debug文件夹push到手机上 +adb push debug /data/local/tmp/ + +adb shell +cd /data/local/tmp/debug +export LD_LIBRARY_PATH=/data/local/tmp/debug:$LD_LIBRARY_PATH + +# detect_system可执行文件的使用方式为: +# ./detect_system 配置文件路径 测试图像路径 +./detect_system ./config_ppyolo_tiny.txt ./000000014439.jpg +``` + +如果对代码做了修改,则需要重新编译并push到手机上。 + +运行效果如下: + +
+ +
+ + +## FAQ +Q1:如果想更换模型怎么办,需要重新按照流程走一遍吗? +A1:如果已经走通了上述步骤,更换模型只需要替换 `.nb` 模型文件即可,同时要注意修改下配置文件中的 `.nb` 文件路径以及类别映射文件(如有必要)。 + +Q2:换一个图测试怎么做? +A2:替换 debug 下的测试图像为你想要测试的图像,使用 ADB 再次 push 到手机上即可。 diff --git a/static/deploy/lite/coco_label_list.txt b/static/deploy/lite/coco_label_list.txt new file mode 100644 index 000000000..1f42c8eb4 --- /dev/null +++ b/static/deploy/lite/coco_label_list.txt @@ -0,0 +1,80 @@ +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush \ No newline at end of file diff --git a/static/deploy/lite/config_ppyolo_tiny.txt b/static/deploy/lite/config_ppyolo_tiny.txt new file mode 100644 index 000000000..1ca0b89da --- /dev/null +++ b/static/deploy/lite/config_ppyolo_tiny.txt @@ -0,0 +1,12 @@ +model_file ./ppyolo_tiny.nb +label_path ./coco_label_list.txt +num_threads 1 +precision fp32 +enable_benchmark 1 +arch YOLO +image_shape 3,320,320 +Resize 320,320 +keep_ratio False +mean 0.485,0.456,0.406 +std 0.229,0.224,0.225 +PadStride 0 diff --git a/static/deploy/lite/run_detection.cc b/static/deploy/lite/run_detection.cc new file mode 100644 index 000000000..15e0d227d --- /dev/null +++ b/static/deploy/lite/run_detection.cc @@ -0,0 +1,390 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "opencv2/core.hpp" +#include "opencv2/imgcodecs.hpp" +#include "opencv2/imgproc.hpp" +#include "paddle_api.h" // NOLINT + + +using namespace paddle::lite_api; // NOLINT +using namespace std; + +struct Object { + cv::Rect rec; + int class_id; + float prob; +}; + +// Object for storing all preprocessed data +struct ImageBlob { + // image width and height + std::vector im_shape_; + // Buffer for image data after preprocessing + const float* im_data_; + std::vector mean_; + std::vector scale_; +}; + +void PrintBenchmarkLog(std::vector det_time, + std::map config, + int img_num) { + std::cout << "----------------- Config info ------------------" << std::endl; + std::cout << "runtime_device: armv8" << std::endl; + std::cout << "precision: " << config.at("precision") << std::endl; + + std::cout << "num_threads: " << config.at("num_threads") << std::endl; + std::cout << "---------------- Data info ---------------------" << std::endl; + std::cout << "batch_size: " << 1 << std::endl; + std::cout << "---------------- Model info --------------------" << std::endl; + std::cout << "Model_name: " << config.at("model_file") << std::endl; + std::cout << "---------------- Perf info ---------------------" << std::endl; + std::cout << "Total number of predicted data: " << img_num + << " and total time spent(s): " + << std::accumulate(det_time.begin(), det_time.end(), 0) << std::endl; + std::cout << "preproce_time(ms): " << det_time[0] / img_num + << ", inference_time(ms): " << det_time[1] / img_num + << ", postprocess_time(ms): " << det_time[2] << std::endl; +} + +std::vector LoadLabels(const std::string &path) { + std::ifstream file; + std::vector labels; + file.open(path); + while (file) { + std::string line; + std::getline(file, line); + std::string::size_type pos = line.find(" "); + if (pos != std::string::npos) { + line = line.substr(pos); + } + labels.push_back(line); + } + file.clear(); + file.close(); + return labels; +} + +std::vector ReadDict(std::string path) { + std::ifstream in(path); + std::string filename; + std::string line; + std::vector m_vec; + if (in) { + while (getline(in, line)) { + m_vec.push_back(line); + } + } else { + std::cout << "no such file" << std::endl; + } + return m_vec; +} + +std::vector split(const std::string &str, + const std::string &delim) { + std::vector res; + if ("" == str) + return res; + char *strs = new char[str.length() + 1]; + std::strcpy(strs, str.c_str()); + + char *d = new char[delim.length() + 1]; + std::strcpy(d, delim.c_str()); + + char *p = std::strtok(strs, d); + while (p) { + string s = p; + res.push_back(s); + p = std::strtok(NULL, d); + } + + return res; +} + +std::map LoadConfigTxt(std::string config_path) { + auto config = ReadDict(config_path); + + std::map dict; + for (int i = 0; i < config.size(); i++) { + std::vector res = split(config[i], " "); + dict[res[0]] = res[1]; + } + return dict; +} + +void PrintConfig(const std::map &config) { + std::cout << "=======PaddleDetection lite demo config======" << std::endl; + for (auto iter = config.begin(); iter != config.end(); iter++) { + std::cout << iter->first << " : " << iter->second << std::endl; + } + std::cout << "===End of PaddleDetection lite demo config===" << std::endl; +} + + +// fill tensor with mean and scale and trans layout: nhwc -> nchw, neon speed up +void neon_mean_scale(const float* din, + float* dout, + int size, + const std::vector mean, + const std::vector scale) { + if (mean.size() != 3 || scale.size() != 3) { + std::cerr << "[ERROR] mean or scale size must equal to 3\n"; + exit(1); + } + float32x4_t vmean0 = vdupq_n_f32(mean[0]); + float32x4_t vmean1 = vdupq_n_f32(mean[1]); + float32x4_t vmean2 = vdupq_n_f32(mean[2]); + float32x4_t vscale0 = vdupq_n_f32(1.f / scale[0]); + float32x4_t vscale1 = vdupq_n_f32(1.f / scale[1]); + float32x4_t vscale2 = vdupq_n_f32(1.f / scale[2]); + float* dout_c0 = dout; + float* dout_c1 = dout + size; + float* dout_c2 = dout + size * 2; + int i = 0; + for (; i < size - 3; i += 4) { + float32x4x3_t vin3 = vld3q_f32(din); + float32x4_t vsub0 = vsubq_f32(vin3.val[0], vmean0); + float32x4_t vsub1 = vsubq_f32(vin3.val[1], vmean1); + float32x4_t vsub2 = vsubq_f32(vin3.val[2], vmean2); + float32x4_t vs0 = vmulq_f32(vsub0, vscale0); + float32x4_t vs1 = vmulq_f32(vsub1, vscale1); + float32x4_t vs2 = vmulq_f32(vsub2, vscale2); + vst1q_f32(dout_c0, vs0); + vst1q_f32(dout_c1, vs1); + vst1q_f32(dout_c2, vs2); + + din += 12; + dout_c0 += 4; + dout_c1 += 4; + dout_c2 += 4; + } + for (; i < size; i++) { + *(dout_c0++) = (*(din++) - mean[0]) * scale[0]; + *(dout_c0++) = (*(din++) - mean[1]) * scale[1]; + *(dout_c0++) = (*(din++) - mean[2]) * scale[2]; + } +} + +std::vector visualize_result( + const float* data, + int count, + float thresh, + cv::Mat& image, + const std::vector &class_names) { + if (data == nullptr) { + std::cerr << "[ERROR] data can not be nullptr\n"; + exit(1); + } + std::vector rect_out; + for (int iw = 0; iw < count; iw++) { + int oriw = image.cols; + int orih = image.rows; + if (data[1] > thresh) { + Object obj; + int x = static_cast(data[2]); + int y = static_cast(data[3]); + int w = static_cast(data[4] - data[2] + 1); + int h = static_cast(data[5] - data[3] + 1); + cv::Rect rec_clip = + cv::Rect(x, y, w, h) & cv::Rect(0, 0, image.cols, image.rows); + obj.class_id = static_cast(data[0]); + obj.prob = data[1]; + obj.rec = rec_clip; + if (w > 0 && h > 0 && obj.prob <= 1) { + rect_out.push_back(obj); + cv::rectangle(image, rec_clip, cv::Scalar(0, 0, 255), 1, cv::LINE_AA); + std::string str_prob = std::to_string(obj.prob); + std::string text = std::string(class_names[obj.class_id]) + ": " + + str_prob.substr(0, str_prob.find(".") + 4); + int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL; + double font_scale = 1.f; + int thickness = 1; + cv::Size text_size = + cv::getTextSize(text, font_face, font_scale, thickness, nullptr); + float new_font_scale = w * 0.5 * font_scale / text_size.width; + text_size = cv::getTextSize( + text, font_face, new_font_scale, thickness, nullptr); + cv::Point origin; + origin.x = x + 3; + origin.y = y + text_size.height + 3; + cv::putText(image, + text, + origin, + font_face, + new_font_scale, + cv::Scalar(0, 255, 255), + thickness, + cv::LINE_AA); + + std::cout << "detection, image size: " << image.cols << ", " + << image.rows + << ", detect object: " << class_names[obj.class_id] + << ", score: " << obj.prob << ", location: x=" << x + << ", y=" << y << ", width=" << w << ", height=" << h + << std::endl; + } + } + data += 6; + } + return rect_out; +} + +// Load Model and create model predictor +std::shared_ptr LoadModel(std::string model_file, + int num_theads) { + MobileConfig config; + config.set_threads(num_theads); + config.set_model_from_file(model_file); + + std::shared_ptr predictor = + CreatePaddlePredictor(config); + return predictor; +} + +ImageBlob prepare_imgdata(const cv::Mat& img, + std::map config) { + ImageBlob img_data; + std::vector target_size_; + std::vector size_str = split(config.at("Resize"), ","); + transform(size_str.begin(), size_str.end(), back_inserter(target_size_), + [](std::string const& s){return stoi(s);}); + int width = target_size_[0]; + int height = target_size_[1]; + img_data.im_shape_ = { + static_cast(target_size_[0]), + static_cast(target_size_[1]) + }; + + std::vector mean_; + std::vector scale_; + std::vector mean_str = split(config.at("mean"), ","); + std::vector std_str = split(config.at("std"), ","); + transform(mean_str.begin(), mean_str.end(), back_inserter(mean_), + [](std::string const& s){return stof(s);}); + transform(std_str.begin(), std_str.end(), back_inserter(scale_), + [](std::string const& s){return stof(s);}); + img_data.mean_ = mean_; + img_data.scale_ = scale_; + return img_data; +} + + +void preprocess(const cv::Mat& img, const ImageBlob img_data, float* data) { + cv::Mat rgb_img; + cv::cvtColor(img, rgb_img, cv::COLOR_BGR2RGB); + cv::resize( + rgb_img, rgb_img, cv::Size(img_data.im_shape_[0],img_data.im_shape_[1]), + 0.f, 0.f, cv::INTER_CUBIC); + cv::Mat imgf; + rgb_img.convertTo(imgf, CV_32FC3, 1 / 255.f); + const float* dimg = reinterpret_cast(imgf.data); + neon_mean_scale( + dimg, data, int(img_data.im_shape_[0] * img_data.im_shape_[1]), + img_data.mean_, img_data.scale_); +} + + +void RunModel(std::map config, + std::string img_path, + const int repeats, + std::vector* times) { + + std::string model_file = config.at("model_file"); + std::string label_path = config.at("label_path"); + // Load Labels + std::vector class_names = LoadLabels(label_path); + + auto predictor = LoadModel(model_file, stoi(config.at("num_threads"))); + cv::Mat img = imread(img_path, cv::IMREAD_COLOR); + auto img_data = prepare_imgdata(img, config); + + auto preprocess_start = std::chrono::steady_clock::now(); + // 1. Prepare input data from image + // input 0 + std::unique_ptr input_tensor0(std::move(predictor->GetInput(0))); + input_tensor0->Resize({1, 3, img_data.im_shape_[0], img_data.im_shape_[1]}); + auto* data0 = input_tensor0->mutable_data(); + preprocess(img, img_data, data0); + + // input1 + std::unique_ptr input_tensor1(std::move(predictor->GetInput(1))); + input_tensor1->Resize({1, 2}); + auto* data1 = input_tensor1->mutable_data(); + data1[0] = img_data.im_shape_[0]; + data1[1] = img_data.im_shape_[1]; + + auto preprocess_end = std::chrono::steady_clock::now(); + + // 2. Run predictor + // warm up + for (int i = 0; i < repeats / 2; i++) + { + predictor->Run(); + } + + auto inference_start = std::chrono::steady_clock::now(); + for (int i = 0; i < repeats; i++) + { + predictor->Run(); + } + auto inference_end = std::chrono::steady_clock::now(); + // 3. Get output and post process + auto postprocess_start = std::chrono::steady_clock::now(); + std::unique_ptr output_tensor( + std::move(predictor->GetOutput(0))); + const float* outptr = output_tensor->data(); + auto shape_out = output_tensor->shape(); + int64_t cnt = 1; + for (auto& i : shape_out) { + cnt *= i; + } + auto rec_out = visualize_result( + outptr, static_cast(cnt / 6), 0.5f, img, class_names); + std::string result_name = + img_path.substr(0, img_path.find(".")) + "_result.jpg"; + cv::imwrite(result_name, img); + auto postprocess_end = std::chrono::steady_clock::now(); + std::chrono::duration prep_diff = preprocess_end - preprocess_start; + times->push_back(double(prep_diff.count() * 1000)); + std::chrono::duration infer_diff = inference_end - inference_start; + times->push_back(double(infer_diff.count() / repeats * 1000)); + std::chrono::duration post_diff = postprocess_end - postprocess_start; + times->push_back(double(post_diff.count() * 1000)); +} + +int main(int argc, char** argv) { + if (argc < 3) { + std::cerr << "[ERROR] usage: " << argv[0] << " config_path image_path\n"; + exit(1); + } + std::string config_path = argv[1]; + std::string img_path = argv[2]; + + // load config + auto config = LoadConfigTxt(config_path); + PrintConfig(config); + + bool enable_benchmark = bool(stoi(config.at("enable_benchmark"))); + int repeats = enable_benchmark ? 50 : 1; + std::vector det_times; + RunModel(config, img_path, repeats, &det_times); + PrintBenchmarkLog(det_times, config, 1); + return 0; +} -- GitLab