From 7a4275b69ac07b05adbef8d1688fbc9d18ceaefb Mon Sep 17 00:00:00 2001 From: wangguanzhong Date: Thu, 26 Nov 2020 13:57:44 +0800 Subject: [PATCH] [Dygraph]add export_model and deploy (#1762) * add export_model and deploy * fix travis-ci * update CMakeList & linux doc * update by comments --- .travis.yml | 2 +- configs/_base_/readers/yolov3_reader.yml | 2 +- deploy/README.md | 15 +- deploy/cpp/CMakeLists.txt | 15 +- deploy/cpp/README.md | 6 +- deploy/cpp/docs/linux_build.md | 56 ++- deploy/cpp/docs/windows_vs2019_build.md | 77 ++- deploy/cpp/include/config_parser.h | 8 + deploy/cpp/include/object_detector.h | 28 +- deploy/cpp/include/preprocess_op.h | 79 +-- deploy/cpp/scripts/bootstrap.sh | 10 - deploy/cpp/scripts/build.sh | 60 ++- deploy/cpp/src/main.cc | 148 ++++-- deploy/cpp/src/object_detector.cc | 123 +++-- deploy/cpp/src/preprocess_op.cc | 74 +-- deploy/python/README.md | 10 +- deploy/python/infer.py | 602 +++++++++-------------- deploy/python/preprocess.py | 216 ++++++++ deploy/python/visualize.py | 95 +++- ppdet/modeling/architecture/meta_arch.py | 15 +- ppdet/modeling/architecture/yolo.py | 15 +- ppdet/modeling/layers.py | 12 +- ppdet/modeling/ops.py | 137 +++--- ppdet/modeling/post_process.py | 2 +- tools/export_model.py | 130 +++++ tools/export_utils.py | 120 +++++ 26 files changed, 1353 insertions(+), 704 deletions(-) delete mode 100644 deploy/cpp/scripts/bootstrap.sh create mode 100644 deploy/python/preprocess.py create mode 100644 tools/export_model.py create mode 100644 tools/export_utils.py diff --git a/.travis.yml b/.travis.yml index 70561d65e..ac5006851 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,7 +20,7 @@ addons: before_install: - sudo pip install -U virtualenv pre-commit pip - docker pull paddlepaddle/paddle:latest - - git pull https://github.com/PaddlePaddle/PaddleDetection dygraph -r + - git pull https://github.com/PaddlePaddle/PaddleDetection dygraph script: - exit_code=0 diff --git a/configs/_base_/readers/yolov3_reader.yml b/configs/_base_/readers/yolov3_reader.yml index 32994714e..b71855087 100644 --- a/configs/_base_/readers/yolov3_reader.yml +++ b/configs/_base_/readers/yolov3_reader.yml @@ -42,7 +42,7 @@ TestReader: fields: ['image', 'im_shape', 'scale_factor', 'im_id'] sample_transforms: - DecodeOp: {} - - ResizeOp: {target_size: [608, 608], interp: 2} + - ResizeOp: {target_size: [608, 608], keep_ratio: False, interp: 2} - NormalizeImageOp: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} - PermuteOp: {} batch_size: 1 diff --git a/deploy/README.md b/deploy/README.md index cdf95c42d..2f8818816 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -1,11 +1,14 @@ # PaddleDetection 预测部署 -`PaddleDetection`目前支持使用`Python`和`C++`部署在`Windows` 和`Linux` 上运行。 +`PaddleDetection`目前支持: +- 使用`Python`和`C++`部署在`Windows` 和`Linux` 上运行 +- [在线服务化部署](./serving/README.md) +- [移动端部署](https://github.com/PaddlePaddle/Paddle-Lite-Demo) ## 模型导出 训练得到一个满足要求的模型后,如果想要将该模型接入到C++服务器端预测库或移动端预测库,需要通过`tools/export_model.py`导出该模型。 -- [导出教程](../docs/advanced_tutorials/deploy/EXPORT_MODEL.md) +- [导出教程](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/advanced_tutorials/deploy/EXPORT_MODEL.md) 模型导出后, 目录结构如下(以`yolov3_darknet`为例): ``` @@ -18,6 +21,8 @@ yolov3_darknet # 模型目录 预测时,该目录所在的路径会作为程序的输入参数。 ## 预测部署 -- [1. Python预测(支持 Linux 和 Windows)](./python/) -- [2. C++预测(支持 Linux 和 Windows)](./cpp/) -- [3. 移动端部署参考Paddle-Lite文档](https://paddle-lite.readthedocs.io/zh/latest/) +- [1. Python预测(支持 Linux 和 Windows)](https://github.com/PaddlePaddle/PaddleDetection/blob/master/deploy/python) +- [2. C++预测(支持 Linux 和 Windows)](https://github.com/PaddlePaddle/PaddleDetection/blob/master/deploy/cpp) +- [3. 在线服务化部署](./serving/README.md) +- [4. 移动端部署](https://github.com/PaddlePaddle/Paddle-Lite-Demo) +- [5. Jetson设备部署](./cpp/docs/Jetson_build.md) diff --git a/deploy/cpp/CMakeLists.txt b/deploy/cpp/CMakeLists.txt index a5e892a80..96457158f 100644 --- a/deploy/cpp/CMakeLists.txt +++ b/deploy/cpp/CMakeLists.txt @@ -10,7 +10,8 @@ SET(PADDLE_DIR "" CACHE PATH "Location of libraries") SET(OPENCV_DIR "" CACHE PATH "Location of libraries") SET(CUDA_LIB "" CACHE PATH "Location of libraries") SET(CUDNN_LIB "" CACHE PATH "Location of libraries") -SET(TENSORRT_DIR "" CACHE PATH "Compile demo with TensorRT") +SET(TENSORRT_INC_DIR "" CACHE PATH "Compile demo with TensorRT") +SET(TENSORRT_LIB_DIR "" CACHE PATH "Compile demo with TensorRT") include(cmake/yaml-cpp.cmake) @@ -112,8 +113,8 @@ endif() if (NOT WIN32) if (WITH_TENSORRT AND WITH_GPU) - include_directories("${TENSORRT_DIR}/include") - link_directories("${TENSORRT_DIR}/lib") + include_directories("${TENSORRT_INC_DIR}/") + link_directories("${TENSORRT_LIB_DIR}/") endif() endif(NOT WIN32) @@ -170,7 +171,7 @@ endif() if (NOT WIN32) set(DEPS ${DEPS} - ${MATH_LIB} ${MKLDNN_LIB} + ${MATH_LIB} ${MKLDNN_LIB} glog gflags protobuf z xxhash yaml-cpp ) if(EXISTS "${PADDLE_DIR}/third_party/install/snappystream/lib") @@ -195,8 +196,8 @@ endif(NOT WIN32) if(WITH_GPU) if(NOT WIN32) if (WITH_TENSORRT) - set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${TENSORRT_DIR}/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${TENSORRT_LIB_DIR}/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) @@ -212,7 +213,7 @@ if (NOT WIN32) set(DEPS ${DEPS} ${EXTERNAL_LIB}) endif() -set(DEPS ${DEPS} ${OpenCV_LIBS}) +set(DEPS ${DEPS} ${OpenCV_LIBS}) add_executable(main src/main.cc src/preprocess_op.cc src/object_detector.cc) ADD_DEPENDENCIES(main ext-yaml-cpp) target_link_libraries(main ${DEPS}) diff --git a/deploy/cpp/README.md b/deploy/cpp/README.md index d7e55d1a7..c7c5e50ff 100644 --- a/deploy/cpp/README.md +++ b/deploy/cpp/README.md @@ -52,7 +52,7 @@ deploy/cpp ## 3.编译部署 ### 3.1 导出模型 -请确认您已经基于`PaddleDetection`的[export_model.py](../../tools/export_model.py)导出您的模型,并妥善保存到合适的位置。导出模型细节请参考 [导出模型教程](../../docs/advanced_tutorials/deploy/EXPORT_MODEL.md)。 +请确认您已经基于`PaddleDetection`的[export_model.py](https://github.com/PaddlePaddle/PaddleDetection/blob/master/tools/export_model.py)导出您的模型,并妥善保存到合适的位置。导出模型细节请参考 [导出模型教程](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/advanced_tutorials/deploy/EXPORT_MODEL.md)。 模型导出后, 目录结构如下(以`yolov3_darknet`为例): ``` @@ -67,5 +67,5 @@ yolov3_darknet # 模型目录 ### 3.2 编译 仅支持在`Windows`和`Linux`平台编译和使用 -- [Linux 编译指南](./docs/linux_build.md) -- [Windows编译指南(使用Visual Studio 2019)](./docs/windows_vs2019_build.md) +- [Linux 编译指南](https://github.com/PaddlePaddle/PaddleDetection/blob/master/deploy/cpp/docs/linux_build.md) +- [Windows编译指南(使用Visual Studio 2019)](https://github.com/PaddlePaddle/PaddleDetection/blob/master/deploy/cpp/docs/windows_vs2019_build.md) diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md index 70788adfd..7ea27d1e2 100644 --- a/deploy/cpp/docs/linux_build.md +++ b/deploy/cpp/docs/linux_build.md @@ -1,7 +1,7 @@ # Linux平台编译指南 ## 说明 -本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过,如果需要使用更高G++版本编译使用,则需要重新编译Paddle预测库,请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。 +本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过,如果需要使用更高G++版本编译使用,则需要重新编译Paddle预测库,请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。本文档使用的预置的opencv库是在ubuntu 16.04上用gcc4.8编译的,如果需要在ubuntu 16.04以外的系统环境编译,那么需自行编译opencv库。 ## 前置条件 * G++ 4.8.2 ~ 4.9.4 @@ -40,38 +40,43 @@ fluid_inference 编译`cmake`的命令在`scripts/build.sh`中,请根据实际情况修改主要参数,其主要内容说明如下: ``` - # 是否使用GPU(即是否使用 CUDA) WITH_GPU=OFF + # 使用MKL or openblas WITH_MKL=ON + # 是否集成 TensorRT(仅WITH_GPU=ON 有效) WITH_TENSORRT=OFF + +# TensorRT 的include路径 +TENSORRT_LIB_DIR=/path/to/TensorRT/include + # TensorRT 的lib路径 -TENSORRT_DIR=/path/to/TensorRT/ +TENSORRT_LIB_DIR=/path/to/TensorRT/lib + # Paddle 预测库路径 -PADDLE_DIR=/path/to/fluid_inference/ +PADDLE_DIR=/path/to/fluid_inference + # Paddle 的预测库是否使用静态库来编译 # 使用TensorRT时,Paddle的预测库通常为动态库 WITH_STATIC_LIB=OFF + # CUDA 的 lib 路径 -CUDA_LIB=/path/to/cuda/lib/ +CUDA_LIB=/path/to/cuda/lib + # CUDNN 的 lib 路径 -CUDNN_LIB=/path/to/cudnn/lib/ +CUDNN_LIB=/path/to/cudnn/lib -# OPENCV 路径, 如果使用自带预编译版本可不修改 -sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的opencv -OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/ +# 请检查以上各个路径是否正确 # 以下无需改动 -rm -rf build -mkdir -p build -cd build cmake .. \ -DWITH_GPU=${WITH_GPU} \ -DWITH_MKL=${WITH_MKL} \ -DWITH_TENSORRT=${WITH_TENSORRT} \ - -DTENSORRT_DIR=${TENSORRT_DIR} \ + -DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \ + -DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \ -DPADDLE_DIR=${PADDLE_DIR} \ -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ -DCUDA_LIB=${CUDA_LIB} \ @@ -86,18 +91,23 @@ make sh ./scripts/build.sh ``` +**注意**: OPENCV依赖OPENBLAS,Ubuntu用户需确认系统是否已存在`libopenblas.so`。如未安装,可执行apt-get install libopenblas-dev进行安装。 ### Step5: 预测及可视化 编译成功后,预测入口程序为`build/main`其主要命令参数说明如下: | 参数 | 说明 | | ---- | ---- | -| model_dir | 导出的预测模型所在路径 | -| image_path | 要预测的图片文件路径 | -| video_path | 要预测的视频文件路径 | -| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| -| --run_mode |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --model_dir | 导出的预测模型所在路径 | +| --image_path | 要预测的图片文件路径 | +| --video_path | 要预测的视频文件路径 | +| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| +| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --gpu_id | 指定进行推理的GPU device id(默认值为0)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_benchmark | 是否重复预测来进行benchmark测速 | +| --output_dir | 输出图片所在的文件夹, 默认为output | -**注意**:如果同时设置了`video_path`和`image_path`,程序仅预测`video_path`。 +**注意**: 如果同时设置了`video_path`和`image_path`,程序仅预测`video_path`。 `样例一`: @@ -106,12 +116,12 @@ make ./build/main --model_dir=/root/projects/models/yolov3_darknet --image_path=/root/projects/images/test.jpeg ``` -图片文件`可视化预测结果`会保存在当前目录下`output.jpeg`文件中。 +图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。 `样例二`: ```shell -#使用 `GPU`预测视频`/root/projects/videos/test.avi` -./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.avi --use_gpu=1 +#使用 `GPU`预测视频`/root/projects/videos/test.mp4` +./build/main --model_dir=/root/projects/models/yolov3_darknet --video_path=/root/projects/images/test.mp4 --use_gpu=1 ``` -视频文件`可视化预测结果`会保存在当前目录下`output.avi`文件中。 +视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md index 7f23a983c..a4f4b1973 100644 --- a/deploy/cpp/docs/windows_vs2019_build.md +++ b/deploy/cpp/docs/windows_vs2019_build.md @@ -4,9 +4,9 @@ Windows 平台下,我们使用`Visual Studio 2019 Community` 进行了测试 ## 前置条件 -* Visual Studio 2019 +* Visual Studio 2019 (根据Paddle预测库所使用的VS版本选择,请参考 [Visual Studio 不同版本二进制兼容性](https://docs.microsoft.com/zh-cn/cpp/porting/binary-compat-2015-2017?view=vs-2019) ) * CUDA 9.0 / CUDA 10.0,cudnn 7+ (仅在使用GPU版本的预测库时需要) -* CMake 3.0+ +* CMake 3.0+ [CMake下载](https://cmake.org/download/) 请确保系统已经安装好上述基本软件,我们使用的是`VS2019`的社区版。 @@ -40,12 +40,14 @@ fluid_inference 1. 在OpenCV官网下载适用于Windows平台的3.4.6版本, [下载地址](https://sourceforge.net/projects/opencvlibrary/files/3.4.6/opencv-3.4.6-vc14_vc15.exe/download) 2. 运行下载的可执行文件,将OpenCV解压至指定目录,如`D:\projects\opencv` -3. 配置环境变量,如下流程所示 +3. 配置环境变量,如下流程所示(如果使用全局绝对路径,可以不用设置环境变量) - 我的电脑->属性->高级系统设置->环境变量 - 在系统变量中找到Path(如没有,自行创建),并双击编辑 - 新建,将opencv路径填入并保存,如`D:\projects\opencv\build\x64\vc14\bin` -### Step4: 使用Visual Studio 2019直接编译CMake +### Step4: 编译 + +#### 通过图形化操作编译CMake 1. 打开Visual Studio 2019 Community,点击`继续但无需代码` ![step2](https://paddleseg.bj.bcebos.com/inference/vs2019_step1.png) @@ -60,14 +62,14 @@ fluid_inference ![step3](https://paddleseg.bj.bcebos.com/inference/vs2019_step4.png) -4. 点击`浏览`,分别设置编译选项指定`CUDA`、`OpenCV`、`Paddle预测库`的路径 +4. 点击`浏览`,分别设置编译选项指定`CUDA`、`CUDNN_LIB`、`OpenCV`、`Paddle预测库`的路径 三个编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): | 参数名 | 含义 | | ---- | ---- | | *CUDA_LIB | CUDA的库路径 | -| CUDNN_LIB | CUDNN的库路径 | +| *CUDNN_LIB | CUDNN的库路径 | | OPENCV_DIR | OpenCV的安装路径, | | PADDLE_DIR | Paddle预测库的路径 | @@ -81,6 +83,26 @@ fluid_inference ![step6](https://paddleseg.bj.bcebos.com/inference/vs2019_step6.png) +#### 通过命令行操作编译CMake + +1. 进入到`cpp`文件夹 +``` +cd D:\projects\PaddleDetection\deploy\cpp +``` + +2. 使用CMake生成项目文件 +``` +cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=path_to_cuda_lib -DCUDNN_LIB=path_to_cudnn_lib -DPADDLE_DIR=path_to_paddle_lib -DOPENCV_DIR=path_to_opencv +``` + +例如: +``` +cmake . -G "Visual Studio 16 2019" -A x64 -T host=x64 -DWITH_GPU=ON -DWITH_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCUDA_LIB=D:\projects\packages\cuda10_0\lib\x64 -DCUDNN_LIB=D:\projects\packages\cuda10_0\lib\x64 -DPADDLE_DIR=D:\projects\packages\fluid_inference -DOPENCV_DIR=D:\projects\packages\opencv3_4_6 +``` + +3. 编译 +用`Visual Studio 16 2019`打开`cpp`文件夹下的`PaddleObjectDetector.sln`,点击`生成`->`全部生成` + ### Step5: 预测及可视化 上述`Visual Studio 2019`编译产出的可执行文件在`out\build\x64-Release`目录下,打开`cmd`,并切换到该目录: @@ -92,12 +114,19 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release | 参数 | 说明 | | ---- | ---- | -| model_dir | 导出的预测模型所在路径 | -| image_path | 要预测的图片文件路径 | -| video_path | 要预测的视频文件路径 | -| use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --model_dir | 导出的预测模型所在路径 | +| --image_path | 要预测的图片文件路径 | +| --video_path | 要预测的视频文件路径 | +| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测)| +| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| +| --gpu_id | 指定进行推理的GPU device id(默认值为0)| +| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| +| --run_benchmark | 是否重复预测来进行benchmark测速 | +| --output_dir | 输出图片所在的文件夹, 默认为output | -**注意**:如果同时设置了`video_path`和`image_path`,程序仅预测`video_path`。 +**注意**: +(1)如果同时设置了`video_path`和`image_path`,程序仅预测`video_path`。 +(2)如果提示找不到`opencv_world346.dll`,把`D:\projects\packages\opencv3_4_6\build\x64\vc14\bin`文件夹下的`opencv_world346.dll`拷贝到`main.exe`文件夹下即可。 `样例一`: @@ -106,13 +135,31 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release .\main --model_dir=D:\\models\\yolov3_darknet --image_path=D:\\images\\test.jpeg ``` -图片文件`可视化预测结果`会保存在当前目录下`output.jpeg`文件中。 +图片文件`可视化预测结果`会保存在当前目录下`output.jpg`文件中。 `样例二`: ```shell -#使用`GPU`测试视频 `D:\\videos\\test.avi` -.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.jpeg --use_gpu=1 +#使用`GPU`测试视频 `D:\\videos\\test.mp4` +.\main --model_dir=D:\\models\\yolov3_darknet --video_path=D:\\videos\\test.mp4 --use_gpu=1 ``` -视频文件`可视化预测结果`会保存在当前目录下`output.avi`文件中。 +视频文件目前支持`.mp4`格式的预测,`可视化预测结果`会保存在当前目录下`output.mp4`文件中。 + + +## 性能测试 +测试环境为:系统: Windows 10专业版系统,CPU: I9-9820X, GPU: GTX 2080 Ti,Paddle预测库: 1.8.4,CUDA: 10.0, CUDNN: 7.4. + +去掉前100轮warmup时间,测试100轮的平均时间,单位ms/image,只计算模型运行时间,不包括数据的处理和拷贝。 + + +|模型 | AnalysisPredictor(ms) | 输入| +|---|----|---| +| YOLOv3-MobileNetv1 | 41.51 | 608*608 +| faster_rcnn_r50_1x | 194.47 | 1333*1333 +| faster_rcnn_r50_vd_fpn_2x | 43.35 | 1344*1344 +| mask_rcnn_r50_fpn_1x | 96.96 | 1344*1344 +| mask_rcnn_r50_vd_fpn_2x | 97.66 | 1344*1344 +| ppyolo_r18vd | 5.54 | 320*320 +| ppyolo_2x | 56.93 | 608*608 +| ttfnet_darknet | 36.17 | 512*512 diff --git a/deploy/cpp/include/config_parser.h b/deploy/cpp/include/config_parser.h index f2102dcc9..05c5941a8 100644 --- a/deploy/cpp/include/config_parser.h +++ b/deploy/cpp/include/config_parser.h @@ -98,6 +98,13 @@ class ConfigPaser { return false; } + if (config["image_shape"].IsDefined()) { + image_shape_ = config["image_shape"].as>(); + } else { + std::cerr << "Please set image_shape." << std::endl; + return false; + } + return true; } std::string mode_; @@ -107,6 +114,7 @@ class ConfigPaser { bool with_background_; YAML::Node preprocess_info_; std::vector label_list_; + std::vector image_shape_; }; } // namespace PaddleDetection diff --git a/deploy/cpp/include/object_detector.h b/deploy/cpp/include/object_detector.h index 328dd458e..a92be7ffc 100644 --- a/deploy/cpp/include/object_detector.h +++ b/deploy/cpp/include/object_detector.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,7 @@ #include "include/preprocess_op.h" #include "include/config_parser.h" +using namespace paddle_infer; namespace PaddleDetection { // Object Detection Result @@ -54,12 +56,15 @@ cv::Mat VisualizeResult(const cv::Mat& img, class ObjectDetector { public: - explicit ObjectDetector(const std::string& model_dir, bool use_gpu = false, - const std::string& run_mode = "fluid") { + explicit ObjectDetector(const std::string& model_dir, + bool use_gpu=false, + const std::string& run_mode="fluid", + const int gpu_id=0) { config_.load_config(model_dir); threshold_ = config_.draw_threshold_; - preprocessor_.Init(config_.preprocess_info_, config_.arch_); - LoadModel(model_dir, use_gpu, config_.min_subgraph_size_, 1, run_mode); + image_shape_ = config_.image_shape_; + preprocessor_.Init(config_.preprocess_info_, image_shape_); + LoadModel(model_dir, use_gpu, config_.min_subgraph_size_, 1, run_mode, gpu_id); } // Load Paddle inference model @@ -68,12 +73,16 @@ class ObjectDetector { bool use_gpu, const int min_subgraph_size, const int batch_size = 1, - const std::string& run_mode = "fluid"); + const std::string& run_mode = "fluid", + const int gpu_id=0); // Run predictor - void Predict( - const cv::Mat& img, - std::vector* result); + void Predict(const cv::Mat& im, + const double threshold = 0.5, + const int warmup = 0, + const int repeats = 1, + const bool run_benchmark = false, + std::vector* result = nullptr); // Get Model Label list const std::vector& GetLabelList() const { @@ -88,12 +97,13 @@ class ObjectDetector { const cv::Mat& raw_mat, std::vector* result); - std::unique_ptr predictor_; + std::shared_ptr predictor_; Preprocessor preprocessor_; ImageBlob inputs_; std::vector output_data_; float threshold_; ConfigPaser config_; + std::vector image_shape_; }; } // namespace PaddleDetection diff --git a/deploy/cpp/include/preprocess_op.h b/deploy/cpp/include/preprocess_op.h index 1e6c8844e..71a9ace0b 100644 --- a/deploy/cpp/include/preprocess_op.h +++ b/deploy/cpp/include/preprocess_op.h @@ -14,6 +14,7 @@ #pragma once +#include #include #include @@ -31,29 +32,36 @@ namespace PaddleDetection { // Object for storing all preprocessed data class ImageBlob { public: - // Original image width and height - std::vector ori_im_size_; + // image width and height + std::vector im_shape_; // Buffer for image data after preprocessing std::vector im_data_; - // Original image width, height, shrink in float format - std::vector ori_im_size_f_; + // input image width, height + std::vector input_shape_; // Evaluation image width and height - std::vector eval_im_size_f_; + //std::vector eval_im_size_f_; + // Scale factor for image size to origin image size + std::vector scale_factor_; }; // Abstraction of preprocessing opration class class PreprocessOp { public: - virtual void Init(const YAML::Node& item, const std::string& arch) = 0; + virtual void Init(const YAML::Node& item, const std::vector image_shape) = 0; virtual void Run(cv::Mat* im, ImageBlob* data) = 0; }; +class InitInfo : public PreprocessOp{ + public: + virtual void Init(const YAML::Node& item, const std::vector image_shape) {} + virtual void Run(cv::Mat* im, ImageBlob* data); +}; + class Normalize : public PreprocessOp { public: - virtual void Init(const YAML::Node& item, const std::string& arch) { + virtual void Init(const YAML::Node& item, const std::vector image_shape) { mean_ = item["mean"].as>(); scale_ = item["std"].as>(); - is_channel_first_ = item["is_channel_first"].as(); is_scale_ = item["is_scale"].as(); } @@ -61,37 +69,29 @@ class Normalize : public PreprocessOp { private: // CHW or HWC - bool is_channel_first_; - bool is_scale_; std::vector mean_; std::vector scale_; + bool is_scale_; }; class Permute : public PreprocessOp { public: - virtual void Init(const YAML::Node& item, const std::string& arch) { - to_bgr_ = item["to_bgr"].as(); - is_channel_first_ = item["channel_first"].as(); - } - + virtual void Init(const YAML::Node& item, const std::vector image_shape) {} virtual void Run(cv::Mat* im, ImageBlob* data); - private: - // RGB to BGR - bool to_bgr_; - // CHW or HWC - bool is_channel_first_; }; class Resize : public PreprocessOp { public: - virtual void Init(const YAML::Node& item, const std::string& arch) { - arch_ = arch; + virtual void Init(const YAML::Node& item, const std::vector image_shape) { interp_ = item["interp"].as(); - max_size_ = item["max_size"].as(); - target_size_ = item["target_size"].as(); - image_shape_ = item["image_shape"].as>(); - } + //max_size_ = item["target_size"].as(); + keep_ratio_ = item["keep_ratio"].as(); + target_size_ = item["target_size"].as>(); + if (item["keep_ratio"]) { + input_shape_ = image_shape; + } + } // Compute best resize scale for x-dimension, y-dimension std::pair GenerateScale(const cv::Mat& im); @@ -99,17 +99,16 @@ class Resize : public PreprocessOp { virtual void Run(cv::Mat* im, ImageBlob* data); private: - std::string arch_; int interp_; - int max_size_; - int target_size_; - std::vector image_shape_; + bool keep_ratio_; + std::vector target_size_; + std::vector input_shape_; }; // Models with FPN need input shape % stride == 0 class PadStride : public PreprocessOp { public: - virtual void Init(const YAML::Node& item, const std::string& arch) { + virtual void Init(const YAML::Node& item, const std::vector image_shape) { stride_ = item["stride"].as(); } @@ -121,23 +120,25 @@ class PadStride : public PreprocessOp { class Preprocessor { public: - void Init(const YAML::Node& config_node, const std::string& arch) { - arch_ = arch; + void Init(const YAML::Node& config_node, const std::vector image_shape) { + // initialize image info at first + ops_["InitInfo"] = std::make_shared(); for (const auto& item : config_node) { auto op_name = item["type"].as(); + ops_[op_name] = CreateOp(op_name); - ops_[op_name]->Init(item, arch); + ops_[op_name]->Init(item, image_shape); } } std::shared_ptr CreateOp(const std::string& name) { - if (name == "Resize") { + if (name == "ResizeOp") { return std::make_shared(); - } else if (name == "Permute") { + } else if (name == "PermuteOp") { return std::make_shared(); - } else if (name == "Normalize") { + } else if (name == "NormalizeImageOp") { return std::make_shared(); - } else if (name == "PadStride") { + } else if (name == "PadBatchOp") { return std::make_shared(); } return nullptr; @@ -149,8 +150,8 @@ class Preprocessor { static const std::vector RUN_ORDER; private: - std::string arch_; std::unordered_map> ops_; }; } // namespace PaddleDetection + diff --git a/deploy/cpp/scripts/bootstrap.sh b/deploy/cpp/scripts/bootstrap.sh deleted file mode 100644 index f9fc1d1ed..000000000 --- a/deploy/cpp/scripts/bootstrap.sh +++ /dev/null @@ -1,10 +0,0 @@ -# download pre-compiled opencv lib -OPENCV_URL=https://paddleseg.bj.bcebos.com/deploy/docker/opencv3gcc4.8.tar.bz2 -if [ ! -d "./deps/opencv3gcc4.8" ]; then - mkdir -p deps - cd deps - wget -c ${OPENCV_URL} - tar xvfj opencv3gcc4.8.tar.bz2 - rm -rf opencv3gcc4.8.tar.bz2 - cd .. -fi diff --git a/deploy/cpp/scripts/build.sh b/deploy/cpp/scripts/build.sh index 0cfd8ceb5..ffc9a4ff2 100644 --- a/deploy/cpp/scripts/build.sh +++ b/deploy/cpp/scripts/build.sh @@ -1,24 +1,63 @@ # 是否使用GPU(即是否使用 CUDA) WITH_GPU=OFF -# 使用MKL or openblas + +# 是否使用MKL or openblas,TX2需要设置为OFF WITH_MKL=ON + # 是否集成 TensorRT(仅WITH_GPU=ON 有效) WITH_TENSORRT=OFF -# TensorRT 的路径 -TENSORRT_DIR=/path/to/TensorRT/ + +# TensorRT 的include路径 +TENSORRT_INC_DIR=/path/to/tensorrt/lib + +# TensorRT 的lib路径 +TENSORRT_LIB_DIR=/path/to/tensorrt/include + # Paddle 预测库路径 PADDLE_DIR=/path/to/fluid_inference/ + # Paddle 的预测库是否使用静态库来编译 # 使用TensorRT时,Paddle的预测库通常为动态库 WITH_STATIC_LIB=OFF + # CUDA 的 lib 路径 -CUDA_LIB=/path/to/cuda/lib/ +CUDA_LIB=/path/to/cuda/lib + # CUDNN 的 lib 路径 -CUDNN_LIB=/path/to/cudnn/lib/ +CUDNN_LIB=/path/to/cudnn/lib + -# OPENCV 路径, 如果使用自带预编译版本可不修改 -sh $(pwd)/scripts/bootstrap.sh # 下载预编译版本的opencv -OPENCV_DIR=$(pwd)/deps/opencv3gcc4.8/ +MACHINE_TYPE=`uname -m` +echo "MACHINE_TYPE: "${MACHINE_TYPE} + + +if [ "$MACHINE_TYPE" = "x86_64" ] +then + echo "set OPENCV_DIR for x86_64" + # linux系统通过以下命令下载预编译的opencv + mkdir -p $(pwd)/deps && cd $(pwd)/deps + wget -c https://bj.bcebos.com/paddleseg/deploy/opencv3.4.6gcc4.8ffmpeg.tar.gz2 + tar xvfj opencv3.4.6gcc4.8ffmpeg.tar.gz2 && cd .. + + # set OPENCV_DIR + OPENCV_DIR=$(pwd)/deps/opencv3.4.6gcc4.8ffmpeg/ + +elif [ "$MACHINE_TYPE" = "aarch64" ] +then + echo "set OPENCV_DIR for aarch64" + # TX2平台通过以下命令下载预编译的opencv + mkdir -p $(pwd)/deps && cd $(pwd)/deps + wget -c https://paddlemodels.bj.bcebos.com/TX2_JetPack4.3_opencv_3.4.10_gcc7.5.0.zip + unzip TX2_JetPack4.3_opencv_3.4.10_gcc7.5.0.zip && cd .. + + # set OPENCV_DIR + OPENCV_DIR=$(pwd)/deps/TX2_JetPack4.3_opencv_3.4.10_gcc7.5.0/ + +else + echo "Please set OPENCV_DIR manually" +fi + +echo "OPENCV_DIR: "$OPENCV_DIR # 以下无需改动 rm -rf build @@ -28,10 +67,13 @@ cmake .. \ -DWITH_GPU=${WITH_GPU} \ -DWITH_MKL=${WITH_MKL} \ -DWITH_TENSORRT=${WITH_TENSORRT} \ - -DTENSORRT_DIR=${TENSORRT_DIR} \ + -DTENSORRT_LIB_DIR=${TENSORRT_LIB_DIR} \ + -DTENSORRT_INC_DIR=${TENSORRT_INC_DIR} \ -DPADDLE_DIR=${PADDLE_DIR} \ -DWITH_STATIC_LIB=${WITH_STATIC_LIB} \ -DCUDA_LIB=${CUDA_LIB} \ -DCUDNN_LIB=${CUDNN_LIB} \ -DOPENCV_DIR=${OPENCV_DIR} + make +echo "make finished!" diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc index 63cd99f01..92ecb3206 100644 --- a/deploy/cpp/src/main.cc +++ b/deploy/cpp/src/main.cc @@ -17,6 +17,16 @@ #include #include #include +#include +#include + +#ifdef _WIN32 +#include +#include +#elif LINUX +#include +#include +#endif #include "include/object_detector.h" @@ -25,13 +35,64 @@ DEFINE_string(model_dir, "", "Path of inference model"); DEFINE_string(image_path, "", "Path of input image"); DEFINE_string(video_path, "", "Path of input video"); DEFINE_bool(use_gpu, false, "Infering with GPU or CPU"); -DEFINE_string(run_mode, "fluid", "mode of running(fluid/trt_fp32/trt_fp16)"); +DEFINE_bool(use_camera, false, "Use camera or not"); +DEFINE_string(run_mode, "fluid", "Mode of running(fluid/trt_fp32/trt_fp16)"); +DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); +DEFINE_int32(camera_id, -1, "Device id of camera to predict"); +DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); +DEFINE_double(threshold, 0.5, "Threshold of score."); +DEFINE_string(output_dir, "output", "Directory of output visualization files."); + +static std::string DirName(const std::string &filepath) { + auto pos = filepath.rfind(OS_PATH_SEP); + if (pos == std::string::npos) { + return ""; + } + return filepath.substr(0, pos); +} + +static bool PathExists(const std::string& path){ +#ifdef _WIN32 + struct _stat buffer; + return (_stat(path.c_str(), &buffer) == 0); +#else + struct stat buffer; + return (stat(path.c_str(), &buffer) == 0); +#endif // !_WIN32 +} + +static void MkDir(const std::string& path) { + if (PathExists(path)) return; + int ret = 0; +#ifdef _WIN32 + ret = _mkdir(path.c_str()); +#else + ret = mkdir(path.c_str(), 0755); +#endif // !_WIN32 + if (ret != 0) { + std::string path_error(path); + path_error += " mkdir failed!"; + throw std::runtime_error(path_error); + } +} + +static void MkDirs(const std::string& path) { + if (path.empty()) return; + if (PathExists(path)) return; + + MkDirs(DirName(path)); + MkDir(path); +} void PredictVideo(const std::string& video_path, PaddleDetection::ObjectDetector* det) { // Open video cv::VideoCapture capture; - capture.open(video_path.c_str()); + if (FLAGS_camera_id != -1){ + capture.open(FLAGS_camera_id); + }else{ + capture.open(video_path.c_str()); + } if (!capture.isOpened()) { printf("can not open video : %s\n", video_path.c_str()); return; @@ -44,9 +105,9 @@ void PredictVideo(const std::string& video_path, // Create VideoWriter for output cv::VideoWriter video_out; - std::string video_out_path = "output.avi"; + std::string video_out_path = "output.mp4"; video_out.open(video_out_path.c_str(), - CV_FOURCC('M', 'J', 'P', 'G'), + 0x00000021, video_fps, cv::Size(video_width, video_height), true); @@ -60,45 +121,71 @@ void PredictVideo(const std::string& video_path, auto colormap = PaddleDetection::GenerateColorMap(labels.size()); // Capture all frames and do inference cv::Mat frame; + int frame_id = 0; while (capture.read(frame)) { if (frame.empty()) { break; } - det->Predict(frame, &result); + det->Predict(frame, 0.5, 0, 1, false, &result); cv::Mat out_im = PaddleDetection::VisualizeResult( frame, result, labels, colormap); + for (const auto& item : result) { + printf("In frame id %d, we detect: class=%d confidence=%.2f rect=[%d %d %d %d]\n", + frame_id, + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); + } video_out.write(out_im); + frame_id += 1; } capture.release(); video_out.release(); } void PredictImage(const std::string& image_path, - PaddleDetection::ObjectDetector* det) { + const double threshold, + const bool run_benchmark, + PaddleDetection::ObjectDetector* det, + const std::string& output_dir = "output") { // Open input image as an opencv cv::Mat object cv::Mat im = cv::imread(image_path, 1); // Store all detected result std::vector result; - det->Predict(im, &result); - for (const auto& item : result) { - printf("class=%d confidence=%.2f rect=[%d %d %d %d]\n", - item.class_id, - item.confidence, - item.rect[0], - item.rect[1], - item.rect[2], - item.rect[3]); + if (run_benchmark) + { + det->Predict(im, threshold, 100, 100, run_benchmark, &result); + }else + { + det->Predict(im, 0.5, 0, 1, run_benchmark, &result); + for (const auto& item : result) { + printf("class=%d confidence=%.4f rect=[%d %d %d %d]\n", + item.class_id, + item.confidence, + item.rect[0], + item.rect[1], + item.rect[2], + item.rect[3]); + } + // Visualization result + auto labels = det->GetLabelList(); + auto colormap = PaddleDetection::GenerateColorMap(labels.size()); + cv::Mat vis_img = PaddleDetection::VisualizeResult( + im, result, labels, colormap); + std::vector compression_params; + compression_params.push_back(CV_IMWRITE_JPEG_QUALITY); + compression_params.push_back(95); + std::string output_path(output_dir); + if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) { + output_path += OS_PATH_SEP; + } + output_path += "output.jpg"; + cv::imwrite(output_path, vis_img, compression_params); + printf("Visualized output saved as %s\n", output_path.c_str()); } - // Visualization result - auto labels = det->GetLabelList(); - auto colormap = PaddleDetection::GenerateColorMap(labels.size()); - cv::Mat vis_img = PaddleDetection::VisualizeResult( - im, result, labels, colormap); - std::vector compression_params; - compression_params.push_back(CV_IMWRITE_JPEG_QUALITY); - compression_params.push_back(95); - cv::imwrite("output.jpeg", vis_img, compression_params); - printf("Visualized output saved as output.jpeg\n"); } int main(int argc, char** argv) { @@ -115,15 +202,18 @@ int main(int argc, char** argv) { std::cout << "run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'."; return -1; } - // Load model and create a object detector + PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, - FLAGS_run_mode); + FLAGS_run_mode, FLAGS_gpu_id); // Do inference on input video or image - if (!FLAGS_video_path.empty()) { + if (!FLAGS_video_path.empty() || FLAGS_use_camera) { PredictVideo(FLAGS_video_path, &det); } else if (!FLAGS_image_path.empty()) { - PredictImage(FLAGS_image_path, &det); + if (!PathExists(FLAGS_output_dir)) { + MkDirs(FLAGS_output_dir); + } + PredictImage(FLAGS_image_path, FLAGS_threshold, FLAGS_run_benchmark, &det, FLAGS_output_dir); } return 0; } diff --git a/deploy/cpp/src/object_detector.cc b/deploy/cpp/src/object_detector.cc index c6522f4f1..b196ed71d 100644 --- a/deploy/cpp/src/object_detector.cc +++ b/deploy/cpp/src/object_detector.cc @@ -11,8 +11,13 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include +// for setprecision +#include +#include "include/object_detector.h" -# include "include/object_detector.h" + +using namespace paddle_infer; namespace PaddleDetection { @@ -21,22 +26,24 @@ void ObjectDetector::LoadModel(const std::string& model_dir, bool use_gpu, const int min_subgraph_size, const int batch_size, - const std::string& run_mode) { - paddle::AnalysisConfig config; - std::string prog_file = model_dir + OS_PATH_SEP + "__model__"; - std::string params_file = model_dir + OS_PATH_SEP + "__params__"; + const std::string& run_mode, + const int gpu_id) { + paddle_infer::Config config; + std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; + std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; config.SetModel(prog_file, params_file); if (use_gpu) { - config.EnableUseGpu(100, 0); + config.EnableUseGpu(200, gpu_id); + config.SwitchIrOptim(true); if (run_mode != "fluid") { - auto precision = paddle::AnalysisConfig::Precision::kFloat32; + auto precision = paddle_infer::Config::Precision::kFloat32; if (run_mode == "trt_fp16") { - precision = paddle::AnalysisConfig::Precision::kHalf; + precision = paddle_infer::Config::Precision::kHalf; } else if (run_mode == "trt_int8") { printf("TensorRT int8 mode is not supported now, " "please use 'trt_fp32' or 'trt_fp16' instead"); } else { - if (run_mode != "trt_32") { + if (run_mode != "trt_fp32") { printf("run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'"); } } @@ -47,15 +54,15 @@ void ObjectDetector::LoadModel(const std::string& model_dir, precision, false, false); - } + } } else { config.DisableGpu(); } config.SwitchUseFeedFetchOps(false); - config.SwitchSpecifyInputNames(true); + config.DisableGlogInfo(); // Memory optimization config.EnableMemoryOptim(); - predictor_ = std::move(CreatePaddlePredictor(config)); + predictor_ = std::move(CreatePredictor(config)); } // Visualiztion MaskDetector results @@ -70,13 +77,15 @@ cv::Mat VisualizeResult(const cv::Mat& img, cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[2], w, h); // Configure color and text size - std::string text = lable_list[results[i].class_id]; + std::ostringstream oss; + oss << std::setiosflags(std::ios::fixed) << std::setprecision(4); + oss << lable_list[results[i].class_id] << " "; + oss << results[i].confidence; + std::string text = oss.str(); int c1 = colormap[3 * results[i].class_id + 0]; int c2 = colormap[3 * results[i].class_id + 1]; int c3 = colormap[3 * results[i].class_id + 2]; cv::Scalar roi_color = cv::Scalar(c1, c2, c3); - text += " "; - text += std::to_string(static_cast(results[i].confidence * 100)) + "%"; int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL; double font_scale = 0.5f; float thickness = 0.5; @@ -139,7 +148,7 @@ void ObjectDetector::Postprocess( int ymax = (output_data_[5 + j * 6] * rh); int wd = xmax - xmin; int hd = ymax - ymin; - if (score > threshold_) { + if (score > threshold_ && class_id > -1) { ObjectResult result_item; result_item.rect = {xmin, xmax, ymin, ymax}; result_item.class_id = class_id; @@ -150,44 +159,78 @@ void ObjectDetector::Postprocess( } void ObjectDetector::Predict(const cv::Mat& im, - std::vector* result) { + const double threshold, + const int warmup, + const int repeats, + const bool run_benchmark, + std::vector* result) { // Preprocess image Preprocess(im); // Prepare input tensor auto input_names = predictor_->GetInputNames(); for (const auto& tensor_name : input_names) { - auto in_tensor = predictor_->GetInputTensor(tensor_name); + auto in_tensor = predictor_->GetInputHandle(tensor_name); if (tensor_name == "image") { - int rh = inputs_.eval_im_size_f_[0]; - int rw = inputs_.eval_im_size_f_[1]; + int rh = inputs_.input_shape_[0]; + int rw = inputs_.input_shape_[1]; in_tensor->Reshape({1, 3, rh, rw}); - in_tensor->copy_from_cpu(inputs_.im_data_.data()); - } else if (tensor_name == "im_size") { - in_tensor->Reshape({1, 2}); - in_tensor->copy_from_cpu(inputs_.ori_im_size_.data()); - } else if (tensor_name == "im_info") { - in_tensor->Reshape({1, 3}); - in_tensor->copy_from_cpu(inputs_.eval_im_size_f_.data()); + in_tensor->CopyFromCpu(inputs_.im_data_.data()); } else if (tensor_name == "im_shape") { - in_tensor->Reshape({1, 3}); - in_tensor->copy_from_cpu(inputs_.ori_im_size_f_.data()); + in_tensor->Reshape({1, 2}); + in_tensor->CopyFromCpu(inputs_.im_shape_.data()); + } else if (tensor_name == "scale_factor") { + in_tensor->Reshape({1, 2}); + in_tensor->CopyFromCpu(inputs_.scale_factor_.data()); } } // Run predictor - predictor_->ZeroCopyRun(); - // Get output tensor - auto output_names = predictor_->GetOutputNames(); - auto out_tensor = predictor_->GetOutputTensor(output_names[0]); - std::vector output_shape = out_tensor->shape(); - // Calculate output length - int output_size = 1; - for (int j = 0; j < output_shape.size(); ++j) { + for (int i = 0; i < warmup; i++) + { + predictor_->Run(); + // Get output tensor + auto output_names = predictor_->GetOutputNames(); + auto out_tensor = predictor_->GetOutputHandle(output_names[0]); + std::vector output_shape = out_tensor->shape(); + // Calculate output length + int output_size = 1; + for (int j = 0; j < output_shape.size(); ++j) { output_size *= output_shape[j]; + } + + if (output_size < 6) { + std::cerr << "[WARNING] No object detected." << std::endl; + } + output_data_.resize(output_size); + out_tensor->CopyToCpu(output_data_.data()); } - output_data_.resize(output_size); - out_tensor->copy_to_cpu(output_data_.data()); + + std::clock_t start = clock(); + for (int i = 0; i < repeats; i++) + { + predictor_->Run(); + // Get output tensor + auto output_names = predictor_->GetOutputNames(); + auto out_tensor = predictor_->GetOutputHandle(output_names[0]); + std::vector output_shape = out_tensor->shape(); + // Calculate output length + int output_size = 1; + for (int j = 0; j < output_shape.size(); ++j) { + output_size *= output_shape[j]; + } + + if (output_size < 6) { + std::cerr << "[WARNING] No object detected." << std::endl; + } + output_data_.resize(output_size); + out_tensor->CopyToCpu(output_data_.data()); + } + std::clock_t end = clock(); + float ms = static_cast(end - start) / CLOCKS_PER_SEC / repeats * 1000.; + printf("Inference: %f ms per batch image\n", ms); // Postprocessing result - Postprocess(im, result); + if(!run_benchmark) { + Postprocess(im, result); + } } std::vector GenerateColorMap(int num_class) { diff --git a/deploy/cpp/src/preprocess_op.cc b/deploy/cpp/src/preprocess_op.cc index b3bc27528..fac0317af 100644 --- a/deploy/cpp/src/preprocess_op.cc +++ b/deploy/cpp/src/preprocess_op.cc @@ -19,6 +19,18 @@ namespace PaddleDetection { +void InitInfo::Run(cv::Mat* im, ImageBlob* data) { + data->im_shape_ = { + static_cast(im->rows), + static_cast(im->cols) + }; + data->scale_factor_ = {1., 1.}; + data->input_shape_ = { + static_cast(im->rows), + static_cast(im->cols) + }; +} + void Normalize::Run(cv::Mat* im, ImageBlob* data) { double e = 1.0; if (is_scale_) { @@ -49,34 +61,34 @@ void Permute::Run(cv::Mat* im, ImageBlob* data) { } void Resize::Run(cv::Mat* im, ImageBlob* data) { - data->ori_im_size_ = { - static_cast(im->rows), - static_cast(im->cols) - }; - data->ori_im_size_f_ = { - static_cast(im->rows), - static_cast(im->cols), - 1.0 - }; auto resize_scale = GenerateScale(*im); cv::resize( *im, *im, cv::Size(), resize_scale.first, resize_scale.second, interp_); - if (max_size_ != 0 && !image_shape_.empty()) { + data->im_shape_ = { + static_cast(im->rows), + static_cast(im->cols), + }; + data->scale_factor_ = { + resize_scale.second, + resize_scale.first, + }; + + if (keep_ratio_) { + int max_size = input_shape_[1]; // Padding the image with 0 border cv::copyMakeBorder( *im, *im, 0, - max_size_ - im->rows, + max_size - im->rows, 0, - max_size_ - im->cols, + max_size - im->cols, cv::BORDER_CONSTANT, cv::Scalar(0)); } - data->eval_im_size_f_ = { - static_cast(im->rows), - static_cast(im->cols), - resize_scale.first + data->input_shape_ = { + static_cast(im->rows), + static_cast(im->cols), }; } @@ -85,23 +97,22 @@ std::pair Resize::GenerateScale(const cv::Mat& im) { int origin_w = im.cols; int origin_h = im.rows; - if (max_size_ != 0 && (arch_ == "RCNN" || arch_ == "RetinaNet")) { + if (keep_ratio_) { int im_size_max = std::max(origin_w, origin_h); int im_size_min = std::min(origin_w, origin_h); - float scale_ratio = - static_cast(target_size_) / static_cast(im_size_min); - if (max_size_ > 0) { - if (round(scale_ratio * im_size_max) > max_size_) { - scale_ratio = - static_cast(max_size_) / static_cast(im_size_max); - } - } + int target_size_max = *std::max_element(target_size_.begin(), target_size_.end()); + int target_size_min = *std::min_element(target_size_.begin(), target_size_.end()); + float scale_min = + static_cast(target_size_min) / static_cast(im_size_min); + float scale_max = + static_cast(target_size_max) / static_cast(im_size_max); + float scale_ratio = std::min(scale_min, scale_max); resize_scale = {scale_ratio, scale_ratio}; } else { resize_scale.first = - static_cast(target_size_) / static_cast(origin_w); + static_cast(target_size_[1]) / static_cast(origin_w); resize_scale.second = - static_cast(target_size_) / static_cast(origin_h); + static_cast(target_size_[0]) / static_cast(origin_h); } return resize_scale; } @@ -124,14 +135,17 @@ void PadStride::Run(cv::Mat* im, ImageBlob* data) { nw - rw, cv::BORDER_CONSTANT, cv::Scalar(0)); - (data->eval_im_size_f_)[0] = static_cast(im->rows); - (data->eval_im_size_f_)[1] = static_cast(im->cols); + data->input_shape_ = { + static_cast(im->rows), + static_cast(im->cols), + }; + } // Preprocessor op running order const std::vector Preprocessor::RUN_ORDER = { - "Resize", "Normalize", "PadStride", "Permute" + "InitInfo", "ResizeOp", "NormalizeImageOp", "PadStrideOp", "PermuteOp" }; void Preprocessor::Run(cv::Mat* im, ImageBlob* data) { diff --git a/deploy/python/README.md b/deploy/python/README.md index 105f62852..b8b3b87be 100644 --- a/deploy/python/README.md +++ b/deploy/python/README.md @@ -3,7 +3,7 @@ Python预测可以使用`tools/infer.py`,此种方式依赖PaddleDetection源码;也可以使用本篇教程预测方式,先将模型导出,使用一个独立的文件进行预测。 -本篇教程使用AnalysisPredictor对[导出模型](../../docs/advanced_tutorials/deploy/EXPORT_MODEL.md)进行高性能预测。 +本篇教程使用AnalysisPredictor对[导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/advanced_tutorials/deploy/EXPORT_MODEL.md)进行高性能预测。 在PaddlePaddle中预测引擎和训练引擎底层有着不同的优化方法, 下面列出了两种不同的预测方式。Executor同时支持训练和预测,AnalysisPredictor则专门针对推理进行了优化,是基于[C++预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/advanced_guide/inference_deployment/inference/native_infer.html)的Python接口,该引擎可以对模型进行多项图优化,减少不必要的内存拷贝。如果用户在部署已训练模型的过程中对性能有较高的要求,我们提供了独立于PaddleDetection的预测脚本,方便用户直接集成部署。 @@ -18,7 +18,7 @@ Python预测可以使用`tools/infer.py`,此种方式依赖PaddleDetection源 ## 1. 导出预测模型 -PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](../../docs/advanced_tutorials/deploy/EXPORT_MODEL.md) +PaddleDetection在训练过程包括网络的前向和优化器相关参数,而在部署过程中,我们只需要前向参数,具体参考:[导出模型](https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/advanced_tutorials/deploy/EXPORT_MODEL.md) 导出后目录下,包括`__model__`,`__params__`和`infer_cfg.yml`三个文件。 @@ -42,12 +42,14 @@ python deploy/python/infer.py --model_dir=/path/to/models --image_file=/path/to/ | 参数 | 是否必须|含义 | |-------|-------|----------| | --model_dir | Yes|上述导出的模型路径 | -| --image_file | Yes |需要预测的图片 | -| --video_file | Yes |需要预测的视频 | +| --image_file | Option |需要预测的图片 | +| --video_file | Option |需要预测的视频 | +| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --use_gpu |No|是否GPU,默认为False| | --run_mode |No|使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16)| | --threshold |No|预测得分的阈值,默认为0.5| | --output_dir |No|可视化结果保存的根目录,默认为output/| +| --run_benchmark |No|是否运行benchmark,同时需指定--image_file| 说明: diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 77d10bf4e..745894d38 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -16,228 +16,143 @@ import os import argparse import time import yaml +import ast +from functools import reduce from PIL import Image import cv2 import numpy as np +import paddle import paddle.fluid as fluid +from preprocess import preprocess, ResizeOp, NormalizeImageOp, PermuteOp, PadStride from visualize import visualize_box_mask +from paddle.inference import Config +from paddle.inference import create_predictor +# Global dictionary +SUPPORT_MODELS = { + 'YOLO', + 'RCNN', +} -def decode_image(im_file, im_info): - """read rgb image - Args: - im_file (str/np.ndarray): path of image/ np.ndarray read by cv2 - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - if isinstance(im_file, str): - with open(im_file, 'rb') as f: - im_read = f.read() - data = np.frombuffer(im_read, dtype='uint8') - im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - im_info['origin_shape'] = im.shape[:2] - im_info['resize_shape'] = im.shape[:2] - else: - im = im_file - im_info['origin_shape'] = im.shape[:2] - im_info['resize_shape'] = im.shape[:2] - return im, im_info - -class Resize(object): - """resize image by target_size and max_size +class Detector(object): + """ Args: - arch (str): model type - target_size (int): the target size of image - max_size (int): the max size of image - use_cv2 (bool): whether us cv2 - image_shape (list): input shape of model - interp (int): method of resize + config (object): config of model, defined by `Config(model_dir)` + model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml + use_gpu (bool): whether use gpu + run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + threshold (float): threshold to reserve the result for output. """ def __init__(self, - arch, - target_size, - max_size, - use_cv2=True, - image_shape=None, - interp=cv2.INTER_LINEAR): - self.target_size = target_size - self.max_size = max_size - self.image_shape = image_shape, - self.arch = arch - self.use_cv2 = use_cv2 - self.interp = interp - self.scale_set = {'RCNN', 'RetinaNet'} - - def __call__(self, im, im_info): - """ - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - im_channel = im.shape[2] - im_scale_x, im_scale_y = self.generate_scale(im) - if self.use_cv2: - im = cv2.resize( - im, - None, - None, - fx=im_scale_x, - fy=im_scale_y, - interpolation=self.interp) - else: - resize_w = int(im_scale_x * float(im.shape[1])) - resize_h = int(im_scale_y * float(im.shape[0])) - if self.max_size != 0: - raise TypeError( - 'If you set max_size to cap the maximum size of image,' - 'please set use_cv2 to True to resize the image.') - im = im.astype('uint8') - im = Image.fromarray(im) - im = im.resize((int(resize_w), int(resize_h)), self.interp) - im = np.array(im) - - # padding im when image_shape fixed by infer_cfg.yml - if self.max_size != 0 and self.image_shape is not None: - padding_im = np.zeros( - (self.max_size, self.max_size, im_channel), dtype=np.float32) - im_h, im_w = im.shape[:2] - padding_im[:im_h, :im_w, :] = im - im = padding_im - - if self.arch in self.scale_set: - im_info['scale'] = im_scale_x - im_info['resize_shape'] = im.shape[:2] - return im, im_info - - def generate_scale(self, im): - """ - Args: - im (np.ndarray): image (np.ndarray) - Returns: - im_scale_x: the resize ratio of X - im_scale_y: the resize ratio of Y - """ - origin_shape = im.shape[:2] - im_c = im.shape[2] - if self.max_size != 0 and self.arch in self.scale_set: - im_size_min = np.min(origin_shape[0:2]) - im_size_max = np.max(origin_shape[0:2]) - im_scale = float(self.target_size) / float(im_size_min) - if np.round(im_scale * im_size_max) > self.max_size: - im_scale = float(self.max_size) / float(im_size_max) - im_scale_x = im_scale - im_scale_y = im_scale - else: - im_scale_x = float(self.target_size) / float(origin_shape[1]) - im_scale_y = float(self.target_size) / float(origin_shape[0]) - return im_scale_x, im_scale_y - - -class Normalize(object): - """normalize image - Args: - mean (list): im - mean - std (list): im / std - is_scale (bool): whether need im / 255 - is_channel_first (bool): if True: image shape is CHW, else: HWC - """ + pred_config, + model_dir, + use_gpu=False, + run_mode='fluid', + threshold=0.5): + self.pred_config = pred_config + self.predictor = load_predictor( + model_dir, + run_mode=run_mode, + min_subgraph_size=self.pred_config.min_subgraph_size, + use_gpu=use_gpu) - def __init__(self, mean, std, is_scale=True, is_channel_first=False): - self.mean = mean - self.std = std - self.is_scale = is_scale - self.is_channel_first = is_channel_first + def preprocess(self, im): + preprocess_ops = [] + for op_info in self.pred_config.preprocess_infos: + new_op_info = op_info.copy() + op_type = new_op_info.pop('type') + preprocess_ops.append(eval(op_type)(**new_op_info)) + im, im_info = preprocess(im, preprocess_ops, + self.pred_config.input_shape) + inputs = create_inputs(im, im_info) + return inputs + + def postprocess(self, np_boxes, np_masks, inputs, threshold=0.5): + # postprocess output of predictor + results = {} + if self.pred_config.arch in ['SSD', 'Face']: + h, w = inputs['im_shape'] + scale_y, scale_x = inputs['scale_factor'] + w, h = float(h) / scale_y, float(w) / scale_x + np_boxes[:, 2] *= h + np_boxes[:, 3] *= w + np_boxes[:, 4] *= h + np_boxes[:, 5] *= w + expect_boxes = (np_boxes[:, 1] > threshold) & (np_boxes[:, 0] > -1) + np_boxes = np_boxes[expect_boxes, :] + for box in np_boxes: + print('class_id:{:d}, confidence:{:.4f},' + 'left_top:[{:.2f},{:.2f}],' + ' right_bottom:[{:.2f},{:.2f}]'.format( + int(box[0]), box[1], box[2], box[3], box[4], box[5])) + results['boxes'] = np_boxes + if np_masks is not None: + np_masks = np_masks[expect_boxes, :, :, :] + results['masks'] = np_masks + return results - def __call__(self, im, im_info): - """ + def predict(self, + image, + threshold=0.5, + warmup=0, + repeats=1, + run_benchmark=False): + ''' Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image + image (str/np.ndarray): path of image/ np.ndarray read by cv2 + threshold (float): threshold of predicted box' score Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - im = im.astype(np.float32, copy=False) - if self.is_channel_first: - mean = np.array(self.mean)[:, np.newaxis, np.newaxis] - std = np.array(self.std)[:, np.newaxis, np.newaxis] - else: - mean = np.array(self.mean)[np.newaxis, np.newaxis, :] - std = np.array(self.std)[np.newaxis, np.newaxis, :] - if self.is_scale: - im = im / 255.0 - im -= mean - im /= std - return im, im_info - - -class Permute(object): - """permute image - Args: - to_bgr (bool): whether convert RGB to BGR - channel_first (bool): whether convert HWC to CHW - """ - - def __init__(self, to_bgr=False, channel_first=True): - self.to_bgr = to_bgr - self.channel_first = channel_first + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + matix element:[class, score, x_min, y_min, x_max, y_max] + MaskRCNN's results include 'masks': np.ndarray: + shape:[N, class_num, mask_resolution, mask_resolution] + ''' + inputs = self.preprocess(image) + np_boxes, np_masks = None, None + input_names = self.predictor.get_input_names() + for i in range(len(input_names)): + input_tensor = self.predictor.get_input_handle(input_names[i]) + input_tensor.copy_from_cpu(inputs[input_names[i]]) - def __call__(self, im, im_info): - """ - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - if self.channel_first: - im = im.transpose((2, 0, 1)).copy() - if self.to_bgr: - im = im[[2, 1, 0], :, :] - return im, im_info + for i in range(warmup): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + np_boxes = boxes_tensor.copy_to_cpu() + if self.pred_config.mask_resolution is not None: + masks_tensor = self.predictor.get_output_handle(output_names[1]) + np_masks = masks_tensor.copy_to_cpu() + t1 = time.time() + for i in range(repeats): + self.predictor.run() + output_names = self.predictor.get_output_names() + boxes_tensor = self.predictor.get_output_handle(output_names[0]) + np_boxes = boxes_tensor.copy_to_cpu() + if self.pred_config.mask_resolution is not None: + masks_tensor = self.predictor.get_output_handle(output_names[1]) + np_masks = masks_tensor.copy_to_cpu() + t2 = time.time() + ms = (t2 - t1) * 1000.0 / repeats + print("Inference: {} ms per batch image".format(ms)) + + # do not perform postprocess in benchmark mode + results = [] + if not run_benchmark: + if reduce(lambda x, y: x * y, np_boxes.shape) < 6: + print('[WARNNING] No object detected.') + results = {'boxes': np.array([])} + else: + results = self.postprocess( + np_boxes, np_masks, inputs, threshold=threshold) -class PadStride(object): - """ padding image for model with FPN - Args: - stride (bool): model with FPN need image shape % stride == 0 - """ + return results - def __init__(self, stride=0): - self.coarsest_stride = stride - def __call__(self, im, im_info): - """ - Args: - im (np.ndarray): image (np.ndarray) - im_info (dict): info of image - Returns: - im (np.ndarray): processed image (np.ndarray) - im_info (dict): info of processed image - """ - coarsest_stride = self.coarsest_stride - if coarsest_stride == 0: - return im - im_c, im_h, im_w = im.shape - pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) - pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) - padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) - padding_im[:, :im_h, :im_w] = im - im_info['resize_shape'] = padding_im.shape[1:] - return padding_im, im_info - - -def create_inputs(im, im_info, model_arch='YOLO'): +def create_inputs(im, im_info): """generate input for different model type Args: im (np.ndarray): image (np.ndarray) @@ -247,30 +162,19 @@ def create_inputs(im, im_info, model_arch='YOLO'): inputs (dict): input of model """ inputs = {} - inputs['image'] = im - origin_shape = list(im_info['origin_shape']) - resize_shape = list(im_info['resize_shape']) - scale = im_info['scale'] - if 'YOLO' in model_arch: - im_size = np.array([origin_shape]).astype('int32') - inputs['im_size'] = im_size - elif 'RetinaNet' in model_arch: - im_info = np.array([resize_shape + [scale]]).astype('float32') - inputs['im_info'] = im_info - elif 'RCNN' in model_arch: - im_info = np.array([resize_shape + [scale]]).astype('float32') - im_shape = np.array([origin_shape + [1.]]).astype('float32') - inputs['im_info'] = im_info - inputs['im_shape'] = im_shape + inputs['image'] = np.array((im, )).astype('float32') + inputs['im_shape'] = np.array((im_info['im_shape'], )).astype('float32') + inputs['scale_factor'] = np.array( + (im_info['scale_factor'], )).astype('float32') + return inputs -class Config(): +class PredictConfig(): """set config of preprocess, postprocess and visualize Args: model_dir (str): root path of model.yml """ - support_models = ['YOLO', 'SSD', 'RetinaNet', 'RCNN', 'Face'] def __init__(self, model_dir): # parsing Yaml config for Preprocess @@ -280,24 +184,32 @@ class Config(): self.check_model(yml_conf) self.arch = yml_conf['arch'] self.preprocess_infos = yml_conf['Preprocess'] - self.use_python_inference = yml_conf['use_python_inference'] self.min_subgraph_size = yml_conf['min_subgraph_size'] self.labels = yml_conf['label_list'] self.mask_resolution = None if 'mask_resolution' in yml_conf: self.mask_resolution = yml_conf['mask_resolution'] + self.input_shape = yml_conf['image_shape'] + self.print_config() def check_model(self, yml_conf): """ Raises: ValueError: loaded model not in supported model type """ - for support_model in self.support_models: + for support_model in SUPPORT_MODELS: if support_model in yml_conf['arch']: return True - raise ValueError( - "Unsupported arch: {}, expect SSD, YOLO, RetinaNet, RCNN and Face". - format(yml_conf['arch'])) + raise ValueError("Unsupported arch: {}, expect {}".format(yml_conf[ + 'arch'], SUPPORT_MODELS)) + + def print_config(self): + print('----------- Model Configuration -----------') + print('%s: %s' % ('Model Arch', self.arch)) + print('%s: ' % ('Transform Order')) + for op_info in self.preprocess_infos: + print('--%s: %s' % ('transform op', op_info['type'])) + print('--------------------------------------------') def load_predictor(model_dir, @@ -321,16 +233,17 @@ def load_predictor(model_dir, if run_mode == 'trt_int8': raise ValueError("TensorRT int8 mode is not supported now, " "please use trt_fp32 or trt_fp16 instead.") + config = Config( + os.path.join(model_dir, 'model.pdmodel'), + os.path.join(model_dir, 'model.pdiparams')) precision_map = { - 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32, - 'trt_fp16': fluid.core.AnalysisConfig.Precision.Half + 'trt_int8': Config.Precision.Int8, + 'trt_fp32': Config.Precision.Float32, + 'trt_fp16': Config.Precision.Half } - config = fluid.core.AnalysisConfig( - os.path.join(model_dir, '__model__'), - os.path.join(model_dir, '__params__')) if use_gpu: # initial GPU memory(M), device ID - config.enable_use_gpu(100, 0) + config.enable_use_gpu(200, 0) # optimize graph and fuse op config.switch_ir_optim(True) else: @@ -351,32 +264,23 @@ def load_predictor(model_dir, config.enable_memory_optim() # disable feed, fetch OP, needed by zero_copy_run config.switch_use_feed_fetch_ops(False) - predictor = fluid.core.create_paddle_predictor(config) + predictor = create_predictor(config) return predictor -def load_executor(model_dir, use_gpu=False): - if use_gpu: - place = fluid.CUDAPlace(0) - else: - place = fluid.CPUPlace() - exe = fluid.Executor(place) - program, feed_names, fetch_targets = fluid.io.load_inference_model( - dirname=model_dir, - executor=exe, - model_filename='__model__', - params_filename='__params__') - return exe, program, fetch_targets - - def visualize(image_file, results, labels, mask_resolution=14, - output_dir='output/'): + output_dir='output/', + threshold=0.5): # visualize the predict result im = visualize_box_mask( - image_file, results, labels, mask_resolution=mask_resolution) + image_file, + results, + labels, + mask_resolution=mask_resolution, + threshold=threshold) img_name = os.path.split(image_file)[-1] if not os.path.exists(output_dir): os.makedirs(output_dir) @@ -385,142 +289,45 @@ def visualize(image_file, print("save result to: " + out_path) -class Detector(): - """ - Args: - model_dir (str): root path of __model__, __params__ and infer_cfg.yml - use_gpu (bool): whether use gpu - """ - - def __init__(self, - model_dir, - use_gpu=False, - run_mode='fluid', - threshold=0.5): - self.config = Config(model_dir) - if self.config.use_python_inference: - self.executor, self.program, self.fecth_targets = load_executor( - model_dir, use_gpu=use_gpu) - else: - self.predictor = load_predictor( - model_dir, - run_mode=run_mode, - min_subgraph_size=self.config.min_subgraph_size, - use_gpu=use_gpu) - self.preprocess_ops = [] - for op_info in self.config.preprocess_infos: - op_type = op_info.pop('type') - if op_type == 'Resize': - op_info['arch'] = self.config.arch - self.preprocess_ops.append(eval(op_type)(**op_info)) +def print_arguments(args): + print('----------- Running Arguments -----------') + for arg, value in sorted(vars(args).items()): + print('%s: %s' % (arg, value)) + print('------------------------------------------') - def preprocess(self, im): - # process image by preprocess_ops - im_info = { - 'scale': 1., - 'origin_shape': None, - 'resize_shape': None, - } - im, im_info = decode_image(im, im_info) - for operator in self.preprocess_ops: - im, im_info = operator(im, im_info) - im = np.array((im, )).astype('float32') - inputs = create_inputs(im, im_info, self.config.arch) - return inputs, im_info - - def postprocess(self, np_boxes, np_masks, im_info, threshold=0.5): - # postprocess output of predictor - results = {} - if self.config.arch in ['SSD', 'Face']: - w, h = im_info['origin_shape'] - np_boxes[:, 2] *= h - np_boxes[:, 3] *= w - np_boxes[:, 4] *= h - np_boxes[:, 5] *= w - expect_boxes = np_boxes[:, 1] > threshold - np_boxes = np_boxes[expect_boxes, :] - for box in np_boxes: - print('class_id:{:d}, confidence:{:.2f},' - 'left_top:[{:.2f},{:.2f}],' - ' right_bottom:[{:.2f},{:.2f}]'.format( - int(box[0]), box[1], box[2], box[3], box[4], box[5])) - results['boxes'] = np_boxes - if np_masks is not None: - np_masks = np_masks[expect_boxes, :, :, :] - results['masks'] = np_masks - return results - def predict(self, image, threshold=0.5): - ''' - Args: - image (str/np.ndarray): path of image/ np.ndarray read by cv2 - threshold (float): threshold of predicted box' score - Returns: - results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, - matix element:[class, score, x_min, y_min, x_max, y_max] - MaskRCNN's results include 'masks': np.ndarray: - shape:[N, class_num, mask_resolution, mask_resolution] - ''' - inputs, im_info = self.preprocess(image) - np_boxes, np_masks = None, None - if self.config.use_python_inference: - t1 = time.time() - outs = self.executor.run(self.program, - feed=inputs, - fetch_list=self.fecth_targets, - return_numpy=False) - t2 = time.time() - ms = (t2 - t1) * 1000.0 - print("Inference: {} ms per batch image".format(ms)) - - np_boxes = np.array(outs[0]) - if self.config.mask_resolution is not None: - np_masks = np.array(outs[1]) - else: - input_names = self.predictor.get_input_names() - for i in range(len(inputs)): - input_tensor = self.predictor.get_input_tensor(input_names[i]) - input_tensor.copy_from_cpu(inputs[input_names[i]]) - t1 = time.time() - self.predictor.zero_copy_run() - t2 = time.time() - - output_names = self.predictor.get_output_names() - boxes_tensor = self.predictor.get_output_tensor(output_names[0]) - np_boxes = boxes_tensor.copy_to_cpu() - if self.config.mask_resolution is not None: - masks_tensor = self.predictor.get_output_tensor(output_names[1]) - np_masks = masks_tensor.copy_to_cpu() - - ms = (t2 - t1) * 1000.0 - print("Inference: {} ms per batch image".format(ms)) - - results = self.postprocess( - np_boxes, np_masks, im_info, threshold=threshold) - return results - - -def predict_image(): - detector = Detector( - FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode) - results = detector.predict(FLAGS.image_file, FLAGS.threshold) - visualize( - FLAGS.image_file, - results, - detector.config.labels, - mask_resolution=detector.config.mask_resolution, - output_dir=FLAGS.output_dir) +def predict_image(detector): + if FLAGS.run_benchmark: + detector.predict( + FLAGS.image_file, + FLAGS.threshold, + warmup=100, + repeats=100, + run_benchmark=True) + else: + results = detector.predict(FLAGS.image_file, FLAGS.threshold) + visualize( + FLAGS.image_file, + results, + detector.pred_config.labels, + mask_resolution=detector.pred_config.mask_resolution, + output_dir=FLAGS.output_dir, + threshold=FLAGS.threshold) -def predict_video(): - detector = Detector( - FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode) - capture = cv2.VideoCapture(FLAGS.video_file) +def predict_video(detector, camera_id): + if camera_id != -1: + capture = cv2.VideoCapture(camera_id) + video_name = 'output.mp4' + else: + capture = cv2.VideoCapture(FLAGS.video_file) + video_name = os.path.split(FLAGS.video_file)[-1] fps = 30 width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) + # yapf: disable fourcc = cv2.VideoWriter_fourcc(*'mp4v') - video_name = os.path.split(FLAGS.video_file)[-1] + # yapf: enable if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) out_path = os.path.join(FLAGS.output_dir, video_name) @@ -536,33 +343,67 @@ def predict_video(): im = visualize_box_mask( frame, results, - detector.config.labels, - mask_resolution=detector.config.mask_resolution) + detector.pred_config.labels, + mask_resolution=detector.pred_config.mask_resolution, + threshold=FLAGS.threshold) im = np.array(im) writer.write(im) + if camera_id != -1: + cv2.imshow('Mask Detection', im) + if cv2.waitKey(1) & 0xFF == ord('q'): + break writer.release() +def main(): + pred_config = PredictConfig(FLAGS.model_dir) + detector = Detector( + pred_config, + FLAGS.model_dir, + use_gpu=FLAGS.use_gpu, + run_mode=FLAGS.run_mode) + # predict from image + if FLAGS.image_file != '': + predict_image(detector) + # predict from video file or camera video stream + if FLAGS.video_file != '' or FLAGS.camera_id != -1: + predict_video(detector, FLAGS.camera_id) + + if __name__ == '__main__': + paddle.enable_static() parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "--model_dir", type=str, default=None, - help=("Directory include:'__model__', '__params__', " + help=("Directory include:'model.pdiparams', 'model.pdmodel', " "'infer_cfg.yml', created by tools/export_model.py."), required=True) parser.add_argument( "--image_file", type=str, default='', help="Path of image file.") parser.add_argument( "--video_file", type=str, default='', help="Path of video file.") + parser.add_argument( + "--camera_id", + type=int, + default=-1, + help="device id of camera to predict.") parser.add_argument( "--run_mode", type=str, default='fluid', help="mode of running(fluid/trt_fp32/trt_fp16)") parser.add_argument( - "--use_gpu", default=False, help="Whether to predict with GPU.") + "--use_gpu", + type=ast.literal_eval, + default=False, + help="Whether to predict with GPU.") + parser.add_argument( + "--run_benchmark", + type=ast.literal_eval, + default=False, + help="Whether to predict a image_file repeatedly for benchmark") parser.add_argument( "--threshold", type=float, default=0.5, help="Threshold of score.") parser.add_argument( @@ -572,9 +413,8 @@ if __name__ == '__main__': help="Directory of output visualization files.") FLAGS = parser.parse_args() + print_arguments(FLAGS) if FLAGS.image_file != '' and FLAGS.video_file != '': assert "Cannot predict image and video at the same time" - if FLAGS.image_file != '': - predict_image() - if FLAGS.video_file != '': - predict_video() + + main() diff --git a/deploy/python/preprocess.py b/deploy/python/preprocess.py new file mode 100644 index 000000000..062e27893 --- /dev/null +++ b/deploy/python/preprocess.py @@ -0,0 +1,216 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from PIL import Image +import cv2 +import numpy as np + + +def decode_image(im_file, im_info): + """read rgb image + Args: + im_file (str|np.ndarray): input can be image path or np.ndarray + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + if isinstance(im_file, str): + with open(im_file, 'rb') as f: + im_read = f.read() + data = np.frombuffer(im_read, dtype='uint8') + im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode + im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) + else: + im = im_file + im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32) + return im, im_info + + +class ResizeOp(object): + """resize image by target_size and max_size + Args: + target_size (int): the target size of image + keep_ratio (bool): whether keep_ratio or not, default true + interp (int): method of resize + """ + + def __init__( + self, + target_size, + keep_ratio=True, + interp=cv2.INTER_LINEAR, ): + if isinstance(target_size, int): + target_size = [target_size, target_size] + self.target_size = target_size + self.keep_ratio = keep_ratio + self.interp = interp + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im_channel = im.shape[2] + im_scale_y, im_scale_x = self.generate_scale(im) + im = cv2.resize( + im, + None, + None, + fx=im_scale_x, + fy=im_scale_y, + interpolation=self.interp) + im_info['im_shape'] = np.array(im.shape[:2]).astype('float32') + im_info['scale_factor'] = np.array( + [im_scale_y, im_scale_x]).astype('float32') + # padding im when image_shape fixed by infer_cfg.yml + if self.keep_ratio: + max_size = im_info['input_shape'][1] + padding_im = np.zeros( + (max_size, max_size, im_channel), dtype=np.float32) + im_h, im_w = im.shape[:2] + padding_im[:im_h, :im_w, :] = im + im = padding_im + return im, im_info + + def generate_scale(self, im): + """ + Args: + im (np.ndarray): image (np.ndarray) + Returns: + im_scale_x: the resize ratio of X + im_scale_y: the resize ratio of Y + """ + origin_shape = im.shape[:2] + im_c = im.shape[2] + if self.keep_ratio: + im_size_min = np.min(origin_shape) + im_size_max = np.max(origin_shape) + target_size_min = np.min(self.target_size) + target_size_max = np.max(self.target_size) + im_scale = float(target_size_min) / float(im_size_min) + if np.round(im_scale * im_size_max) > target_size_max: + im_scale = float(target_size_max) / float(im_size_max) + im_scale_x = im_scale + im_scale_y = im_scale + else: + resize_h, resize_w = self.target_size + im_scale_y = resize_h / float(origin_shape[0]) + im_scale_x = resize_w / float(origin_shape[1]) + return im_scale_y, im_scale_x + + +class NormalizeImageOp(object): + """normalize image + Args: + mean (list): im - mean + std (list): im / std + is_scale (bool): whether need im / 255 + is_channel_first (bool): if True: image shape is CHW, else: HWC + """ + + def __init__(self, mean, std, is_scale=True): + self.mean = mean + self.std = std + self.is_scale = is_scale + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.astype(np.float32, copy=False) + mean = np.array(self.mean)[np.newaxis, np.newaxis, :] + std = np.array(self.std)[np.newaxis, np.newaxis, :] + + if self.is_scale: + im = im / 255.0 + + im -= mean + im /= std + return im, im_info + + +class PermuteOp(object): + """permute image + Args: + to_bgr (bool): whether convert RGB to BGR + channel_first (bool): whether convert HWC to CHW + """ + + def __init__(self, ): + super(PermuteOp, self).__init__() + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + im = im.transpose((2, 0, 1)).copy() + return im, im_info + + +class PadStride(object): + """ padding image for model with FPN + Args: + stride (bool): model with FPN need image shape % stride == 0 + """ + + def __init__(self, stride=0): + self.coarsest_stride = stride + + def __call__(self, im, im_info): + """ + Args: + im (np.ndarray): image (np.ndarray) + im_info (dict): info of image + Returns: + im (np.ndarray): processed image (np.ndarray) + im_info (dict): info of processed image + """ + coarsest_stride = self.coarsest_stride + if coarsest_stride == 0: + return im + im_c, im_h, im_w = im.shape + pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride) + pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride) + padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32) + padding_im[:, :im_h, :im_w] = im + return padding_im, im_info + + +def preprocess(im, preprocess_ops, input_shape): + # process image by preprocess_ops + im_info = { + 'scale_factor': np.array( + [1., 1.], dtype=np.float32), + 'im_shape': None, + 'input_shape': input_shape, + } + im, im_info = decode_image(im, im_info) + for operator in preprocess_ops: + im, im_info = operator(im, im_info) + return im, im_info diff --git a/deploy/python/visualize.py b/deploy/python/visualize.py index 075a3619f..1c136be4d 100644 --- a/deploy/python/visualize.py +++ b/deploy/python/visualize.py @@ -18,20 +18,22 @@ from __future__ import division import cv2 import numpy as np from PIL import Image, ImageDraw +from scipy import ndimage -def visualize_box_mask(im, results, labels, mask_resolution=14): - """ +def visualize_box_mask(im, results, labels, mask_resolution=14, threshold=0.5): + """ Args: im (str/np.ndarray): path of image/np.ndarray read by cv2 - results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, + results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] - MaskRCNN's results include 'masks': np.ndarray: - shape:[N, class_num, mask_resolution, mask_resolution] + MaskRCNN's results include 'masks': np.ndarray: + shape:[N, class_num, mask_resolution, mask_resolution] labels (list): labels:['class1', ..., 'classn'] mask_resolution (int): shape of a mask is:[mask_resolution, mask_resolution] + threshold (float): Threshold of score. Returns: - im (PIL.Image.Image): visualized image + im (PIL.Image.Image): visualized image """ if isinstance(im, str): im = Image.open(im).convert('RGB') @@ -46,15 +48,23 @@ def visualize_box_mask(im, results, labels, mask_resolution=14): resolution=mask_resolution) if 'boxes' in results: im = draw_box(im, results['boxes'], labels) + if 'segm' in results: + im = draw_segm( + im, + results['segm'], + results['label'], + results['score'], + labels, + threshold=threshold) return im def get_color_map_list(num_classes): - """ + """ Args: num_classes (int): number of class Returns: - color_map (list): RGB color list + color_map (list): RGB color list """ color_map = num_classes * [0, 0, 0] for i in range(0, num_classes): @@ -71,9 +81,9 @@ def get_color_map_list(num_classes): def expand_boxes(boxes, scale=0.0): - """ + """ Args: - boxes (np.ndarray): shape:[N,4], N:number of box, + boxes (np.ndarray): shape:[N,4], N:number of box, matix element:[x_min, y_min, x_max, y_max] scale (float): scale of boxes Returns: @@ -94,17 +104,17 @@ def expand_boxes(boxes, scale=0.0): def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5): - """ + """ Args: im (PIL.Image.Image): PIL image - np_boxes (np.ndarray): shape:[N,6], N: number of box, + np_boxes (np.ndarray): shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] np_masks (np.ndarray): shape:[N, class_num, resolution, resolution] labels (list): labels:['class1', ..., 'classn'] resolution (int): shape of a mask is:[resolution, resolution] threshold (float): threshold of mask Returns: - im (PIL.Image.Image): visualized image + im (PIL.Image.Image): visualized image """ color_list = get_color_map_list(len(labels)) scale = (resolution + 2.0) / resolution @@ -149,14 +159,14 @@ def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5): def draw_box(im, np_boxes, labels): - """ + """ Args: im (PIL.Image.Image): PIL image - np_boxes (np.ndarray): shape:[N,6], N: number of box, + np_boxes (np.ndarray): shape:[N,6], N: number of box, matix element:[class, score, x_min, y_min, x_max, y_max] labels (list): labels:['class1', ..., 'classn'] Returns: - im (PIL.Image.Image): visualized image + im (PIL.Image.Image): visualized image """ draw_thickness = min(im.size) // 320 draw = ImageDraw.Draw(im) @@ -180,9 +190,60 @@ def draw_box(im, np_boxes, labels): fill=color) # draw label - text = "{} {:.2f}".format(labels[clsid], score) + text = "{} {:.4f}".format(labels[clsid], score) tw, th = draw.textsize(text) draw.rectangle( [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color) draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255)) return im + + +def draw_segm(im, + np_segms, + np_label, + np_score, + labels, + threshold=0.5, + alpha=0.7): + """ + Draw segmentation on image + """ + mask_color_id = 0 + w_ratio = .4 + color_list = get_color_map_list(len(labels)) + im = np.array(im).astype('float32') + clsid2color = {} + np_segms = np_segms.astype(np.uint8) + for i in range(np_segms.shape[0]): + mask, score, clsid = np_segms[i], np_score[i], np_label[i] + 1 + if score < threshold: + continue + + if clsid not in clsid2color: + clsid2color[clsid] = color_list[clsid] + color_mask = clsid2color[clsid] + for c in range(3): + color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255 + idx = np.nonzero(mask) + color_mask = np.array(color_mask) + im[idx[0], idx[1], :] *= 1.0 - alpha + im[idx[0], idx[1], :] += alpha * color_mask + sum_x = np.sum(mask, axis=0) + x = np.where(sum_x > 0.5)[0] + sum_y = np.sum(mask, axis=1) + y = np.where(sum_y > 0.5)[0] + x0, x1, y0, y1 = x[0], x[-1], y[0], y[-1] + cv2.rectangle(im, (x0, y0), (x1, y1), + tuple(color_mask.astype('int32').tolist()), 1) + bbox_text = '%s %.2f' % (labels[clsid], score) + t_size = cv2.getTextSize(bbox_text, 0, 0.3, thickness=1)[0] + cv2.rectangle(im, (x0, y0), (x0 + t_size[0], y0 - t_size[1] - 3), + tuple(color_mask.astype('int32').tolist()), -1) + cv2.putText( + im, + bbox_text, (x0, y0 - 2), + cv2.FONT_HERSHEY_SIMPLEX, + 0.3, (0, 0, 0), + 1, + lineType=cv2.LINE_AA) + return Image.fromarray(im.astype('uint8')) diff --git a/ppdet/modeling/architecture/meta_arch.py b/ppdet/modeling/architecture/meta_arch.py index 51d185247..c99bdb16e 100644 --- a/ppdet/modeling/architecture/meta_arch.py +++ b/ppdet/modeling/architecture/meta_arch.py @@ -16,17 +16,21 @@ class BaseArch(nn.Layer): def __init__(self): super(BaseArch, self).__init__() - def forward(self, data, input_def, mode): - self.inputs = self.build_inputs(data, input_def) + def forward(self, data, input_def, mode, input_tensor=None): + if input_tensor is None: + self.inputs = self.build_inputs(data, input_def) + else: + self.inputs = input_tensor self.inputs['mode'] = mode self.model_arch() if mode == 'train': out = self.get_loss() elif mode == 'infer': - out = self.get_pred() + out = self.get_pred(input_tensor is None) else: - raise "Now, only support train or infer mode!" + out = None + raise "Now, only support train and infer mode!" return out def build_inputs(self, data, input_def): @@ -43,3 +47,6 @@ class BaseArch(nn.Layer): def get_pred(self, ): raise NotImplementedError("Should implement get_pred method!") + + def get_export_model(self, input_tensor): + return self.forward(None, None, 'infer', input_tensor) diff --git a/ppdet/modeling/architecture/yolo.py b/ppdet/modeling/architecture/yolo.py index e63ec8bd2..0f22c209e 100644 --- a/ppdet/modeling/architecture/yolo.py +++ b/ppdet/modeling/architecture/yolo.py @@ -43,13 +43,16 @@ class YOLOv3(BaseArch): loss = self.yolo_head.get_loss(self.yolo_head_outs, self.inputs) return loss - def get_pred(self, ): + def get_pred(self, return_numpy=True): bbox, bbox_num = self.post_process( self.yolo_head_outs, self.yolo_head.mask_anchors, self.inputs['im_shape'], self.inputs['scale_factor']) - outs = { - "bbox": bbox.numpy(), - "bbox_num": bbox_num.numpy(), - 'im_id': self.inputs['im_id'].numpy() - } + if return_numpy: + outs = { + "bbox": bbox.numpy(), + "bbox_num": bbox_num.numpy(), + 'im_id': self.inputs['im_id'].numpy() + } + else: + outs = [bbox, bbox_num] return outs diff --git a/ppdet/modeling/layers.py b/ppdet/modeling/layers.py index a5ae2e900..3fe9d1d7e 100644 --- a/ppdet/modeling/layers.py +++ b/ppdet/modeling/layers.py @@ -358,7 +358,8 @@ class MultiClassNMS(object): nms_threshold=.5, normalized=False, nms_eta=1.0, - background_label=0): + background_label=0, + return_rois_num=True): super(MultiClassNMS, self).__init__() self.score_threshold = score_threshold self.nms_top_k = nms_top_k @@ -367,6 +368,7 @@ class MultiClassNMS(object): self.normalized = normalized self.nms_eta = nms_eta self.background_label = background_label + self.return_rois_num = return_rois_num def __call__(self, bboxes, score): kwargs = self.__dict__.copy() @@ -419,14 +421,10 @@ class YOLOBox(object): self.clip_bbox = clip_bbox self.scale_x_y = scale_x_y - def __call__(self, yolo_head_out, anchors, im_shape, scale_factor=None): + def __call__(self, yolo_head_out, anchors, im_shape, scale_factor): boxes_list = [] scores_list = [] - if scale_factor is not None: - origin_shape = im_shape / scale_factor - else: - origin_shape = im_shape - + origin_shape = im_shape / scale_factor origin_shape = paddle.cast(origin_shape, 'int32') for i, head_out in enumerate(yolo_head_out): boxes, scores = ops.yolo_box(head_out, origin_shape, anchors[i], diff --git a/ppdet/modeling/ops.py b/ppdet/modeling/ops.py index 88a174377..0fd1be746 100644 --- a/ppdet/modeling/ops.py +++ b/ppdet/modeling/ops.py @@ -29,10 +29,19 @@ import numpy as np from functools import reduce __all__ = [ - 'roi_pool', 'roi_align', 'prior_box', 'anchor_generator', - 'generate_proposals', 'iou_similarity', 'box_coder', 'yolo_box', - 'multiclass_nms', 'distribute_fpn_proposals', 'collect_fpn_proposals', - 'matrix_nms', 'BatchNorm' + 'roi_pool', + 'roi_align', + 'prior_box', + 'anchor_generator', + 'generate_proposals', + 'iou_similarity', + 'box_coder', + 'yolo_box', + 'multiclass_nms', + 'distribute_fpn_proposals', + 'collect_fpn_proposals', + 'matrix_nms', + 'BatchNorm', ] @@ -663,31 +672,31 @@ def yolo_box( clip_bbox, 'scale_x_y', scale_x_y) boxes, scores = core.ops.yolo_box(x, origin_shape, *attrs) return boxes, scores - - boxes = helper.create_variable_for_type_inference(dtype=x.dtype) - scores = helper.create_variable_for_type_inference(dtype=x.dtype) - - attrs = { - "anchors": anchors, - "class_num": class_num, - "conf_thresh": conf_thresh, - "downsample_ratio": downsample_ratio, - "clip_bbox": clip_bbox, - "scale_x_y": scale_x_y, - } - - helper.append_op( - type='yolo_box', - inputs={ - "X": x, - "ImgSize": origin_shape, - }, - outputs={ - 'Boxes': boxes, - 'Scores': scores, - }, - attrs=attrs) - return boxes, scores + else: + boxes = helper.create_variable_for_type_inference(dtype=x.dtype) + scores = helper.create_variable_for_type_inference(dtype=x.dtype) + + attrs = { + "anchors": anchors, + "class_num": class_num, + "conf_thresh": conf_thresh, + "downsample_ratio": downsample_ratio, + "clip_bbox": clip_bbox, + "scale_x_y": scale_x_y, + } + + helper.append_op( + type='yolo_box', + inputs={ + "X": x, + "ImgSize": origin_shape, + }, + outputs={ + 'Boxes': boxes, + 'Scores': scores, + }, + attrs=attrs) + return boxes, scores def prior_box(input, @@ -952,6 +961,7 @@ def multiclass_nms(bboxes, nms_eta=1., background_label=0, return_index=False, + return_rois_num=True, rois_num=None, name=None): """ @@ -1054,44 +1064,45 @@ def multiclass_nms(bboxes, output, index, nms_rois_num = core.ops.multiclass_nms3(bboxes, scores, rois_num, *attrs) if return_index: - return output, index, nms_rois_num - else: - return output, nms_rois_num + index = None + return output, nms_rois_num, index - output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) - index = helper.create_variable_for_type_inference(dtype='int') - - inputs = {'BBoxes': bboxes, 'Scores': scores} - outputs = {'Out': output, 'Index': index} + else: + output = helper.create_variable_for_type_inference(dtype=bboxes.dtype) + index = helper.create_variable_for_type_inference(dtype='int') - if rois_num is not None: - inputs['RoisNum'] = rois_num - nms_rois_num = helper.create_variable_for_type_inference(dtype='int32') - outputs['NmsRoisNum'] = nms_rois_num + inputs = {'BBoxes': bboxes, 'Scores': scores} + outputs = {'Out': output, 'Index': index} - helper.append_op( - type="multiclass_nms3", - inputs=inputs, - attrs={ - 'background_label': background_label, - 'score_threshold': score_threshold, - 'nms_top_k': nms_top_k, - 'nms_threshold': nms_threshold, - 'keep_top_k': keep_top_k, - 'nms_eta': nms_eta, - 'normalized': normalized - }, - outputs=outputs) - output.stop_gradient = True - index.stop_gradient = True + if rois_num is not None: + inputs['RoisNum'] = rois_num - if return_index and rois_num is not None: - return output, index, nms_rois_num - elif return_index and rois_num is None: - return output, index - elif not return_index and rois_num is not None: - return output, nms_rois_num - return output + if return_rois_num: + nms_rois_num = helper.create_variable_for_type_inference( + dtype='int32') + outputs['NmsRoisNum'] = nms_rois_num + + helper.append_op( + type="multiclass_nms3", + inputs=inputs, + attrs={ + 'background_label': background_label, + 'score_threshold': score_threshold, + 'nms_top_k': nms_top_k, + 'nms_threshold': nms_threshold, + 'keep_top_k': keep_top_k, + 'nms_eta': nms_eta, + 'normalized': normalized + }, + outputs=outputs) + output.stop_gradient = True + index.stop_gradient = True + if not return_index: + index = None + if not return_rois_num: + nms_rois_num = None + + return output, nms_rois_num, index def matrix_nms(bboxes, diff --git a/ppdet/modeling/post_process.py b/ppdet/modeling/post_process.py index 24827f510..0b7eca1af 100644 --- a/ppdet/modeling/post_process.py +++ b/ppdet/modeling/post_process.py @@ -18,7 +18,7 @@ class BBoxPostProcess(object): def __call__(self, head_out, rois, im_shape, scale_factor=None): bboxes, score = self.decode(head_out, rois, im_shape, scale_factor) - bbox_pred, bbox_num = self.nms(bboxes, score) + bbox_pred, bbox_num, _ = self.nms(bboxes, score) return bbox_pred, bbox_num diff --git a/tools/export_model.py b/tools/export_model.py new file mode 100644 index 000000000..888bb3e37 --- /dev/null +++ b/tools/export_model.py @@ -0,0 +1,130 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os, sys +# add python path of PadleDetection to sys.path +parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2))) +if parent_path not in sys.path: + sys.path.append(parent_path) + +# ignore numba warning +import warnings +warnings.filterwarnings('ignore') +import glob +import numpy as np +from PIL import Image +import paddle +from ppdet.core.workspace import load_config, merge_config, create +from ppdet.utils.check import check_gpu, check_version, check_config +from ppdet.utils.cli import ArgsParser +from ppdet.utils.checkpoint import load_weight +from export_utils import dump_infer_config +from paddle.jit import to_static +import paddle.nn as nn +from paddle.static import InputSpec +import logging +FORMAT = '%(asctime)s-%(levelname)s: %(message)s' +logging.basicConfig(level=logging.INFO, format=FORMAT) +logger = logging.getLogger(__name__) + + +def parse_args(): + parser = ArgsParser() + parser.add_argument( + "--output_dir", + type=str, + default="output_inference", + help="Directory for storing the output model files.") + args = parser.parse_args() + return args + + +def run(FLAGS, cfg): + + # Model + main_arch = cfg.architecture + model = create(cfg.architecture) + inputs_def = cfg['TestReader']['inputs_def'] + assert 'image_shape' in inputs_def, 'image_shape must be specified.' + image_shape = inputs_def.get('image_shape') + + assert not None in image_shape, 'image_shape should not contain None' + cfg_name = os.path.basename(FLAGS.config).split('.')[0] + save_dir = os.path.join(FLAGS.output_dir, cfg_name) + if not os.path.exists(save_dir): + os.makedirs(save_dir) + + image_shape = dump_infer_config(cfg, + os.path.join(save_dir, 'infer_cfg.yml'), + image_shape) + + class ExportModel(nn.Layer): + def __init__(self, model): + super(ExportModel, self).__init__() + self.model = model + + @to_static(input_spec=[ + { + 'image': InputSpec( + shape=[None] + image_shape, name='image') + }, + { + 'im_shape': InputSpec( + shape=[None, 2], name='im_shape') + }, + { + 'scale_factor': InputSpec( + shape=[None, 2], name='scale_factor') + }, + ]) + def forward(self, image, im_shape, scale_factor): + inputs = {} + inputs_tensor = [image, im_shape, scale_factor] + for t in inputs_tensor: + inputs.update(t) + outs = self.model.get_export_model(inputs) + return outs + + export_model = ExportModel(model) + # debug for dy2static, remove later + #paddle.jit.set_code_level() + + # Init Model + load_weight(export_model.model, cfg.weights) + + export_model.eval() + + # export config and model + paddle.jit.save(export_model, os.path.join(save_dir, 'model')) + logger.info('Export model to {}'.format(save_dir)) + + +def main(): + paddle.set_device("cpu") + FLAGS = parse_args() + + cfg = load_config(FLAGS.config) + merge_config(FLAGS.opt) + check_config(cfg) + check_gpu(cfg.use_gpu) + check_version() + + run(FLAGS, cfg) + + +if __name__ == '__main__': + main() diff --git a/tools/export_utils.py b/tools/export_utils.py new file mode 100644 index 000000000..6a50ddfd7 --- /dev/null +++ b/tools/export_utils.py @@ -0,0 +1,120 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import yaml +import numpy as np +from collections import OrderedDict + +import logging +logger = logging.getLogger(__name__) + +import paddle.fluid as fluid + +__all__ = ['dump_infer_config', 'save_infer_model'] + +# Global dictionary +TRT_MIN_SUBGRAPH = { + 'YOLO': 3, + 'SSD': 3, + 'RCNN': 40, + 'RetinaNet': 40, + 'EfficientDet': 40, + 'Face': 3, + 'TTFNet': 3, + 'FCOS': 3, + 'SOLOv2': 60, +} + + +def parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape): + preprocess_list = [] + + anno_file = dataset_cfg.get_anno() + with_background = reader_cfg['with_background'] + use_default_label = dataset_cfg.use_default_label + + if metric == 'COCO': + from ppdet.utils.coco_eval import get_category_info + else: + raise ValueError("metric only supports COCO, but received {}".format( + metric)) + clsid2catid, catid2name = get_category_info(anno_file, with_background, + use_default_label) + + label_list = [str(cat) for cat in catid2name.values()] + + sample_transforms = reader_cfg['sample_transforms'] + for st in sample_transforms[1:]: + for key, value in st.items(): + p = {'type': key} + if key == 'ResizeOp': + if value.get('keep_ratio', False): + max_size = max(image_shape[1:]) + image_shape = [3, max_size, max_size] + p.update(value) + preprocess_list.append(p) + batch_transforms = reader_cfg.get('batch_transforms', None) + if batch_transforms: + methods = [list(bt.keys())[0] for bt in batch_transforms] + for bt in batch_transforms: + for key, value in bt.items(): + if key == 'PadBatch': + preprocess_list.append({'type': 'PadStride'}) + preprocess_list[-1].update({ + 'stride': value['pad_to_stride'] + }) + break + + return with_background, preprocess_list, label_list, image_shape + + +def dump_infer_config(config, path, image_shape): + arch_state = False + from ppdet.core.config.yaml_helpers import setup_orderdict + setup_orderdict() + infer_cfg = OrderedDict({ + 'mode': 'fluid', + 'draw_threshold': 0.5, + 'metric': config['metric'], + 'image_shape': image_shape + }) + infer_arch = config['architecture'] + + for arch, min_subgraph_size in TRT_MIN_SUBGRAPH.items(): + if arch in infer_arch: + infer_cfg['arch'] = arch + infer_cfg['min_subgraph_size'] = min_subgraph_size + arch_state = True + break + if not arch_state: + logger.error( + 'Architecture: {} is not supported for exporting model now'.format( + infer_arch)) + os._exit(0) + + if 'Mask' in config['architecture']: + infer_cfg['mask_resolution'] = config['Mask']['mask_resolution'] + infer_cfg['with_background'], infer_cfg['Preprocess'], infer_cfg[ + 'label_list'], image_shape = parse_reader( + config['TestReader'], config['TestDataset'], config['metric'], + infer_cfg['arch'], image_shape) + + yaml.dump(infer_cfg, open(path, 'w')) + logger.info("Export inference config file to {}".format(os.path.join(path))) + return image_shape -- GitLab