From 147fb45046a3c1df50a94e7e63732ff78d6d0313 Mon Sep 17 00:00:00 2001 From: zhiboniu Date: Thu, 6 Jan 2022 10:04:35 +0000 Subject: [PATCH] add keypoint mnn support del json config & preprocess --- configs/keypoint/README.md | 5 + configs/keypoint/README_en.md | 4 + deploy/README.md | 14 +- deploy/README_en.md | 13 +- .../third_engine/demo_mnn_kpts/CMakeLists.txt | 26 ++ .../demo_mnn_kpts/CMakeLists_armv8.txt | 47 ++ deploy/third_engine/demo_mnn_kpts/README.md | 116 +++++ .../demo_mnn_kpts/keypoint_detector.cpp | 179 ++++++++ .../demo_mnn_kpts/keypoint_detector.h | 131 ++++++ .../demo_mnn_kpts/keypoint_postprocess.cpp | 258 +++++++++++ .../demo_mnn_kpts/keypoint_postprocess.h | 67 +++ deploy/third_engine/demo_mnn_kpts/main.cpp | 424 ++++++++++++++++++ .../demo_mnn_kpts/picodet_mnn.cpp | 229 ++++++++++ .../third_engine/demo_mnn_kpts/picodet_mnn.h | 138 ++++++ 14 files changed, 1647 insertions(+), 4 deletions(-) create mode 100644 deploy/third_engine/demo_mnn_kpts/CMakeLists.txt create mode 100644 deploy/third_engine/demo_mnn_kpts/CMakeLists_armv8.txt create mode 100644 deploy/third_engine/demo_mnn_kpts/README.md create mode 100644 deploy/third_engine/demo_mnn_kpts/keypoint_detector.cpp create mode 100644 deploy/third_engine/demo_mnn_kpts/keypoint_detector.h create mode 100644 deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.cpp create mode 100644 deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.h create mode 100644 deploy/third_engine/demo_mnn_kpts/main.cpp create mode 100644 deploy/third_engine/demo_mnn_kpts/picodet_mnn.cpp create mode 100644 deploy/third_engine/demo_mnn_kpts/picodet_mnn.h diff --git a/configs/keypoint/README.md b/configs/keypoint/README.md index d7d6c1e55..e750312a0 100644 --- a/configs/keypoint/README.md +++ b/configs/keypoint/README.md @@ -128,6 +128,11 @@ python deploy/python/mot_keypoint_unite_infer.py --mot_model_dir=output_inferenc **注意:** 跟踪模型导出教程请参考`configs/mot/README.md`。 +### 4、模型单独部署 + +​ 我们提供了PaddleInference(服务器端)、PaddleLite(移动端)、第三方部署(MNN、OpenVino)支持。无需依赖训练代码,deploy文件夹下相应文件夹提供独立完整部署代码。 +详见 [部署文档](../../deploy/README.md)介绍。 + ## Benchmark 我们给出了不同运行环境下的测试结果,供您在选用模型时参考。详细数据请见[Keypoint Inference Benchmark](./KeypointBenchmark.md)。 diff --git a/configs/keypoint/README_en.md b/configs/keypoint/README_en.md index 9ab6d1b74..05e77f668 100644 --- a/configs/keypoint/README_en.md +++ b/configs/keypoint/README_en.md @@ -126,6 +126,10 @@ python deploy/python/mot_keypoint_unite_infer.py --mot_model_dir=output_inferenc **Note:** To export MOT model, please refer to [Here](../../configs/mot/README_en.md). +### 4、Deploy standalone + +​ We provide standalone deploy of PaddleInference(Server-GPU)、PaddleLite(mobile、ARM)、Third-Engine(MNN、OpenVino), which is independent of training codes。For detail, please click [Deploy-docs](../../deploy/README_en.md)。 + ## Benchmark We provide benchmarks in different runtime environments for your reference when choosing models. See [Keypoint Inference Benchmark](./KeypointBenchmark.md) for details. diff --git a/deploy/README.md b/deploy/README.md index d476d7fd1..c70eb73a9 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -51,7 +51,17 @@ python tools/export_model.py -c configs/yolov3/yolov3_mobilenet_v1_roadsign.yml - 详细案例请参考[Paddle-Lite-Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo)部署。更多内容,请参考[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) -## 4.Benchmark测试 +## 4.第三方部署(MNN、NCNN、Openvino) +- 第三方部署提供PicoDet、TinyPose案例,其他模型请参考修改 + +| Third_Engine | MNN | NCNN | OPENVINO | +| ------------ | ---- | ----- | ---------- | +| PicoDet | [PicoDet_MNN](./third_engine/demo_mnn/README.md) | [PicoDet_NCNN](./third_engine/demo_ncnn/README.md) | [PicoDet_OPENVINO](./third_engine/demo_openvino/README.md) | +| TinyPose | [TinyPose_MNN](./third_engine/demo_mnn_kpts/README.md) | - | [TinyPose_OPENVINO](./third_engine/demo_openvino_kpts/README.md) | + + + +## 5.Benchmark测试 - 使用导出的模型,运行Benchmark批量测试脚本: ```shell sh deploy/benchmark/benchmark.sh {model_dir} {model_name} @@ -62,7 +72,7 @@ sh deploy/benchmark/benchmark.sh {model_dir} {model_name} python deploy/benchmark/log_parser_excel.py --log_path=./output_pipeline --output_name=benchmark_excel.xlsx ``` -## 5.常见问题QA +## 6.常见问题QA - 1、`Paddle 1.8.4`训练的模型,可以用`Paddle2.0`部署吗? Paddle 2.0是兼容Paddle 1.8.4的,因此是可以的。但是部分模型(如SOLOv2)使用到了Paddle 2.0中新增OP,这类模型不可以。 diff --git a/deploy/README_en.md b/deploy/README_en.md index ef7148f0b..d805ffafa 100644 --- a/deploy/README_en.md +++ b/deploy/README_en.md @@ -51,7 +51,16 @@ For details on model export, please refer to the documentation [Tutorial on Padd - For details, please refer to [Paddle-Lite-Demo](https://github.com/PaddlePaddle/Paddle-Lite-Demo) deployment. For more information, please refer to [Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) -## 4. Benchmark Test +## 4.Third-Engine deploy(MNN、NCNN、Openvino) +- The Third-Engine deploy take example of PicoDet、TinyPose,the others model is the same + +| Third_Engine | MNN | NCNN | OPENVINO | +| ------------ | ------------------------------------------------------ | -------------------------------------------------- | ------------------------------------------------------------ | +| PicoDet | [PicoDet_MNN](./third_engine/demo_mnn/README.md) | [PicoDet_NCNN](./third_engine/demo_ncnn/README.md) | [PicoDet_OPENVINO](./third_engine/demo_openvino/README.md) | +| TinyPose | [TinyPose_MNN](./third_engine/demo_mnn_kpts/README.md) | - | [TinyPose_OPENVINO](./third_engine/demo_openvino_kpts/README.md) | + + +## 5. Benchmark Test - Using the exported model, run the Benchmark batch test script: ```shell sh deploy/benchmark/benchmark.sh {model_dir} {model_name} @@ -62,7 +71,7 @@ sh deploy/benchmark/benchmark.sh {model_dir} {model_name} python deploy/benchmark/log_parser_excel.py --log_path=./output_pipeline --output_name=benchmark_excel.xlsx ``` -## 5. FAQ +## 6. FAQ - 1、Can `Paddle 1.8.4` trained models be deployed with `Paddle2.0`? Paddle 2.0 is compatible with Paddle 1.8.4, so it is ok. However, some models (such as SOLOv2) use the new OP in Paddle 2.0, which is not allowed. diff --git a/deploy/third_engine/demo_mnn_kpts/CMakeLists.txt b/deploy/third_engine/demo_mnn_kpts/CMakeLists.txt new file mode 100644 index 000000000..84bf51a93 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/CMakeLists.txt @@ -0,0 +1,26 @@ +cmake_minimum_required(VERSION 3.9) + +project(tinypose-mnn) + +set(CMAKE_CXX_STANDARD 17) +set(MNN_DIR {YOUR_MNN_DIR}) + +find_package(OpenCV REQUIRED) + +include_directories( + ${MNN_DIR}/include + ${MNN_DIR}/include/MNN + ${CMAKE_SOURCE_DIR} +) +link_directories(mnn/lib) + +add_library(libMNN SHARED IMPORTED) +set_target_properties( + libMNN + PROPERTIES IMPORTED_LOCATION + ${CMAKE_SOURCE_DIR}/mnn/lib/libMNN.so +) +add_executable(tinypose-mnn main.cpp picodet_mnn.cpp keypoint_detector.cpp keypoint_postprocess.cpp) + +target_link_libraries(tinypose-mnn MNN ${OpenCV_LIBS} libMNN.so) + diff --git a/deploy/third_engine/demo_mnn_kpts/CMakeLists_armv8.txt b/deploy/third_engine/demo_mnn_kpts/CMakeLists_armv8.txt new file mode 100644 index 000000000..027f0dd97 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/CMakeLists_armv8.txt @@ -0,0 +1,47 @@ +cmake_minimum_required(VERSION 3.9) + +project(tinypose-mnn) + +set(CMAKE_CXX_STANDARD 17) +set(MNN_DIR {YOUR_MNN_DIR}) +set(NDK_ROOT {YOUR_ANDROID_NDK_PATH}) +set(LDFLAGS -latomic -pthread -ldl -llog -lz -static-libstdc++) + +set(OpenCV_DIR ${CMAKE_SOURCE_DIR}/third/opencv4.1.0/arm64-v8a) + +set(OpenCV_DEPS ${OpenCV_DIR}/libs/libopencv_imgcodecs.a + ${OpenCV_DIR}/libs/libopencv_imgproc.a + ${OpenCV_DIR}/libs/libopencv_core.a + ${OpenCV_DIR}/3rdparty/libs/libtegra_hal.a + ${OpenCV_DIR}/3rdparty/libs/liblibjpeg-turbo.a + ${OpenCV_DIR}/3rdparty/libs/liblibwebp.a + ${OpenCV_DIR}/3rdparty/libs/liblibpng.a + ${OpenCV_DIR}/3rdparty/libs/liblibjasper.a + ${OpenCV_DIR}/3rdparty/libs/liblibtiff.a + ${OpenCV_DIR}/3rdparty/libs/libIlmImf.a + ${OpenCV_DIR}/3rdparty/libs/libtbb.a + ${OpenCV_DIR}/3rdparty/libs/libcpufeatures.a) + +set(FLAGS "-pie -Wl,--gc-sections -funwind-tables -no-canonical-prefixes -D__ANDROID_API__=21 -fexceptions -frtti -std=c++11 -O3 -DNDEBUG -fPIE -fopenmp") +set(CMAKE_CXX_FLAGS "--sysroot=${NDK_ROOT}/sysroot ${FLAGS}") + +set(STDCXX ${NDK_ROOT}/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++_static.a + ${NDK_ROOT}/sources/cxx-stl/llvm-libc++/libs/arm64-v8a/libc++abi.a + ${NDK_ROOT}/platforms/android-21/arch-arm64/usr/lib/libstdc++.a) +set(SYS_INCS ${NDK_ROOT}/sysroot/usr/include/aarch64-linux-android/ ${NDK_ROOT}/sources/cxx-stl/llvm-libc++/include/ ${NDK_ROOT}/sources/cxx-stl/llvm-libc++abi/include/ ${NDK_ROOT}/sources/android/support/include/ ${NDK_ROOT}/sysroot/usr/include/) + +include_directories( + ${SYS_INCS} + ${OpenCV_DIR}/include + ${MNN_DIR}/include + ${MNN_DIR}/include/MNN + ${CMAKE_SOURCE_DIR} +) + +link_directories(${NDK_ROOT}/platforms/android-21/arch-arm64) +link_directories(${MNN_DIR}/project/android/build_64) + +add_executable(tinypose-mnn picodet_mnn.cpp keypoint_postprocess.cpp keypoint_detector.cpp main.cpp) + +target_link_libraries(tinypose-mnn -lMNN ${OpenCV_DEPS} ${STDCXX} ${LDFLAGS}) + diff --git a/deploy/third_engine/demo_mnn_kpts/README.md b/deploy/third_engine/demo_mnn_kpts/README.md new file mode 100644 index 000000000..6445e603a --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/README.md @@ -0,0 +1,116 @@ +# TinyPose MNN Demo + +This fold provides PicoDet+TinyPose inference code using +[Alibaba's MNN framework](https://github.com/alibaba/MNN). Most of the implements in +this fold are same as *demo_ncnn*. + +## Install MNN + +### Python library + +Just run: + +``` shell +pip install MNN +``` + +### C++ library + +Please follow the [official document](https://www.yuque.com/mnn/en/build_linux) to build MNN engine. + +- Create picodet_m_416_coco.onnx and tinypose256.onnx + example: + ```shell + modelName=picodet_m_416_coco + # export model + python tools/export_model.py \ + -c configs/picodet/${modelName}.yml \ + -o weights=${modelName}.pdparams \ + --output_dir=inference_model + # convert to onnx + paddle2onnx --model_dir inference_model/${modelName} \ + --model_filename model.pdmodel \ + --params_filename model.pdiparams \ + --opset_version 11 \ + --save_file ${modelName}.onnx + # onnxsim + python -m onnxsim ${modelName}.onnx ${modelName}_processed.onnx + ``` + +- Convert model + example: + ``` shell + python -m MNN.tools.mnnconvert -f ONNX --modelFile picodet-416.onnx --MNNModel picodet-416.mnn + ``` +Here are converted model +[picodet_m_416](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416.mnn). +[tinypose256](https://paddledet.bj.bcebos.com/deploy/third_engine/tinypose256.mnn) + +## Build + +For C++ code, replace `libMNN.so` under *./mnn/lib* with the one you just compiled, modify OpenCV path and MNN path at CMake file, +and run + +``` shell +mkdir build && cd build +cmake .. +make +``` + +Note that a flag at `main.cpp` is used to control whether to show the detection result or save it into a fold. + +``` c++ +#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows +``` + +#### ARM Build + +Prepare OpenCV library [OpenCV_4_1](https://paddle-inference-dist.bj.bcebos.com/opencv4.1.0.tar.gz). + +``` shell +mkdir third && cd third +wget https://paddle-inference-dist.bj.bcebos.com/opencv4.1.0.tar.gz +tar -zxvf opencv4.1.0.tar.gz +cd .. + +mkdir build && cd build +cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI="arm64-v8a" -DANDROID_PLATFORM=android-21 -DANDROID_TOOLCHAIN=gcc .. +make +``` + +## Run + +To detect images in a fold, run: +``` shell +./tinypose-mnn [mode] [image_file] +``` +| param | detail | +| ---- | ---- | +| --mode | input mode,0:camera;1:image;2:video;3:benchmark | +| --image_file | input image path | + +for example: + +``` shell +./tinypose-mnn "1" "../imgs/test.jpg" +``` + +For speed benchmark: + +``` shell +./tinypose-mnn "3" "0" +``` + +## Benchmark +Plateform: Kirin980 +Model: [tinypose256](https://paddledet.bj.bcebos.com/deploy/third_engine/tinypose256.mnn) + +| param | Min(s) | Max(s) | Avg(s) | +| -------- | ------ | ------ | ------ | +| Thread=4 | 0.018 | 0.021 | 0.019 | +| Thread=1 | 0.031 | 0.041 | 0.032 | + + + +## Reference +[MNN](https://github.com/alibaba/MNN) diff --git a/deploy/third_engine/demo_mnn_kpts/keypoint_detector.cpp b/deploy/third_engine/demo_mnn_kpts/keypoint_detector.cpp new file mode 100644 index 000000000..05fad66d8 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/keypoint_detector.cpp @@ -0,0 +1,179 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +// for setprecision +#include +#include +#include "keypoint_detector.h" + +namespace PaddleDetection { + +// Visualiztion MaskDetector results +cv::Mat VisualizeKptsResult(const cv::Mat& img, + const std::vector& results, + const std::vector& colormap, + float threshold) { + const int edge[][2] = {{0, 1}, + {0, 2}, + {1, 3}, + {2, 4}, + {3, 5}, + {4, 6}, + {5, 7}, + {6, 8}, + {7, 9}, + {8, 10}, + {5, 11}, + {6, 12}, + {11, 13}, + {12, 14}, + {13, 15}, + {14, 16}, + {11, 12}}; + cv::Mat vis_img = img.clone(); + for (int batchid = 0; batchid < results.size(); batchid++) { + for (int i = 0; i < results[batchid].num_joints; i++) { + if (results[batchid].keypoints[i * 3] > threshold) { + int x_coord = int(results[batchid].keypoints[i * 3 + 1]); + int y_coord = int(results[batchid].keypoints[i * 3 + 2]); + cv::circle(vis_img, + cv::Point2d(x_coord, y_coord), + 1, + cv::Scalar(0, 0, 255), + 2); + } + } + for (int i = 0; i < results[batchid].num_joints; i++) { + if (results[batchid].keypoints[edge[i][0] * 3] > threshold && + results[batchid].keypoints[edge[i][1] * 3] > threshold) { + int x_start = int(results[batchid].keypoints[edge[i][0] * 3 + 1]); + int y_start = int(results[batchid].keypoints[edge[i][0] * 3 + 2]); + int x_end = int(results[batchid].keypoints[edge[i][1] * 3 + 1]); + int y_end = int(results[batchid].keypoints[edge[i][1] * 3 + 2]); + cv::line(vis_img, + cv::Point2d(x_start, y_start), + cv::Point2d(x_end, y_end), + colormap[i], + 1); + } + } + } + return vis_img; +} + +void KeyPointDetector::Postprocess(std::vector& output, + std::vector& output_shape, + std::vector& idxout, + std::vector& idx_shape, + std::vector* result, + std::vector>& center_bs, + std::vector>& scale_bs) { + std::vector preds(output_shape[1] * 3, 0); + for (int batchid = 0; batchid < output_shape[0]; batchid++) { + get_final_preds(output, + output_shape, + idxout, + idx_shape, + center_bs[batchid], + scale_bs[batchid], + preds, + batchid, + this->use_dark()); + KeyPointResult result_item; + result_item.num_joints = output_shape[1]; + result_item.keypoints.clear(); + for (int i = 0; i < output_shape[1]; i++) { + result_item.keypoints.emplace_back(preds[i * 3]); + result_item.keypoints.emplace_back(preds[i * 3 + 1]); + result_item.keypoints.emplace_back(preds[i * 3 + 2]); + } + result->push_back(result_item); + } +} + +void KeyPointDetector::Predict(const std::vector imgs, + std::vector>& center_bs, + std::vector>& scale_bs, + std::vector* result) { + int batch_size = imgs.size(); + KeyPointDet_interpreter->resizeTensor(input_tensor, + {batch_size, 3, in_h, in_w}); + KeyPointDet_interpreter->resizeSession(KeyPointDet_session); + auto insize = 3 * in_h * in_w; + + // Preprocess image + cv::Mat resized_im; + for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) { + cv::Mat im = imgs.at(bs_idx); + + cv::resize(im, resized_im, cv::Size(in_w, in_h)); + std::shared_ptr pretreat( + MNN::CV::ImageProcess::create( + MNN::CV::BGR, MNN::CV::RGB, mean_vals, 3, norm_vals, 3)); + pretreat->convert( + resized_im.data, in_w, in_h, resized_im.step[0], input_tensor); + } + + // Run predictor + auto inference_start = std::chrono::steady_clock::now(); + + KeyPointDet_interpreter->runSession(KeyPointDet_session); + // Get output tensor + auto out_tensor = KeyPointDet_interpreter->getSessionOutput( + KeyPointDet_session, "conv2d_441.tmp_1"); + auto nchwoutTensor = new Tensor(out_tensor, Tensor::CAFFE); + out_tensor->copyToHostTensor(nchwoutTensor); + + auto output_shape = nchwoutTensor->shape(); + // Calculate output length + int output_size = 1; + for (int j = 0; j < output_shape.size(); ++j) { + output_size *= output_shape[j]; + } + output_data_.resize(output_size); + std::copy_n(nchwoutTensor->host(), output_size, output_data_.data()); + delete nchwoutTensor; + + auto idx_tensor = KeyPointDet_interpreter->getSessionOutput( + KeyPointDet_session, "argmax_0.tmp_0"); + + auto idxhostTensor = new Tensor(idx_tensor, Tensor::CAFFE); + idx_tensor->copyToHostTensor(idxhostTensor); + + auto idx_shape = idxhostTensor->shape(); + // Calculate output length + output_size = 1; + for (int j = 0; j < idx_shape.size(); ++j) { + output_size *= idx_shape[j]; + } + + idx_data_.resize(output_size); + std::copy_n(idxhostTensor->host(), output_size, idx_data_.data()); + delete idxhostTensor; + + auto inference_end = std::chrono::steady_clock::now(); + std::chrono::duration elapsed = inference_end - inference_start; + printf("keypoint inference time: %f s\n", elapsed.count()); + + // Postprocessing result + Postprocess(output_data_, + output_shape, + idx_data_, + idx_shape, + result, + center_bs, + scale_bs); +} + +} // namespace PaddleDetection diff --git a/deploy/third_engine/demo_mnn_kpts/keypoint_detector.h b/deploy/third_engine/demo_mnn_kpts/keypoint_detector.h new file mode 100644 index 000000000..1c7af8921 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/keypoint_detector.h @@ -0,0 +1,131 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "Interpreter.hpp" + +#include "ImageProcess.hpp" +#include "MNNDefine.h" +#include "Tensor.hpp" + +#include "keypoint_postprocess.h" + +using namespace MNN; + +namespace PaddleDetection { +// Object KeyPoint Result +struct KeyPointResult { + // Keypoints: shape(N x 3); N: number of Joints; 3: x,y,conf + std::vector keypoints; + int num_joints = -1; +}; + +// Visualiztion KeyPoint Result +cv::Mat VisualizeKptsResult(const cv::Mat& img, + const std::vector& results, + const std::vector& colormap, + float threshold = 0.2); + +class KeyPointDetector { + public: + explicit KeyPointDetector(const std::string& model_path, + int num_thread = 4, + int input_height = 256, + int input_width = 192, + float score_threshold = 0.3, + const int batch_size = 1, + bool use_dark = true) { + printf("config path: %s", + model_path.substr(0, model_path.find_last_of('/') + 1).c_str()); + use_dark_ = use_dark; + + in_w = input_width; + in_h = input_height; + threshold_ = score_threshold; + + KeyPointDet_interpreter = std::shared_ptr( + MNN::Interpreter::createFromFile(model_path.c_str())); + MNN::ScheduleConfig config; + config.type = MNN_FORWARD_CPU; + /*modeNum means gpuMode for GPU usage, Or means numThread for CPU usage.*/ + config.numThread = num_thread; + // If type not fount, let it failed + config.backupType = MNN_FORWARD_CPU; + BackendConfig backendConfig; + backendConfig.precision = static_cast(1); + config.backendConfig = &backendConfig; + + KeyPointDet_session = KeyPointDet_interpreter->createSession(config); + + input_tensor = + KeyPointDet_interpreter->getSessionInput(KeyPointDet_session, nullptr); + } + + ~KeyPointDetector() { + KeyPointDet_interpreter->releaseModel(); + KeyPointDet_interpreter->releaseSession(KeyPointDet_session); + } + + // Load Paddle inference model + void LoadModel(std::string model_file, int num_theads); + + // Run predictor + void Predict(const std::vector imgs, + std::vector>& center, + std::vector>& scale, + std::vector* result = nullptr); + + bool use_dark() { return this->use_dark_; } + + inline float get_threshold() { return threshold_; }; + + // const float mean_vals[3] = { 103.53f, 116.28f, 123.675f }; + // const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f }; + const float mean_vals[3] = {0.f, 0.f, 0.f}; + const float norm_vals[3] = {1.f, 1.f, 1.f}; + int in_w = 128; + int in_h = 256; + + private: + // Postprocess result + void Postprocess(std::vector& output, + std::vector& output_shape, + std::vector& idxout, + std::vector& idx_shape, + std::vector* result, + std::vector>& center, + std::vector>& scale); + + std::vector output_data_; + std::vector idx_data_; + float threshold_; + bool use_dark_; + + std::shared_ptr KeyPointDet_interpreter; + MNN::Session* KeyPointDet_session = nullptr; + MNN::Tensor* input_tensor = nullptr; +}; + +} // namespace PaddleDetection diff --git a/deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.cpp b/deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.cpp new file mode 100644 index 000000000..fe6e8298d --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.cpp @@ -0,0 +1,258 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "keypoint_postprocess.h" +#define PI 3.1415926535 +#define HALF_CIRCLE_DEGREE 180 + +cv::Point2f get_3rd_point(cv::Point2f& a, cv::Point2f& b) { + cv::Point2f direct{a.x - b.x, a.y - b.y}; + return cv::Point2f(a.x - direct.y, a.y + direct.x); +} + +std::vector get_dir(float src_point_x, + float src_point_y, + float rot_rad) { + float sn = sin(rot_rad); + float cs = cos(rot_rad); + std::vector src_result{0.0, 0.0}; + src_result[0] = src_point_x * cs - src_point_y * sn; + src_result[1] = src_point_x * sn + src_point_y * cs; + return src_result; +} + +void affine_tranform( + float pt_x, float pt_y, cv::Mat& trans, std::vector& preds, int p) { + double new1[3] = {pt_x, pt_y, 1.0}; + cv::Mat new_pt(3, 1, trans.type(), new1); + cv::Mat w = trans * new_pt; + preds[p * 3 + 1] = static_cast(w.at(0, 0)); + preds[p * 3 + 2] = static_cast(w.at(1, 0)); +} + +void get_affine_transform(std::vector& center, + std::vector& scale, + float rot, + std::vector& output_size, + cv::Mat& trans, + int inv) { + float src_w = scale[0]; + float dst_w = static_cast(output_size[0]); + float dst_h = static_cast(output_size[1]); + float rot_rad = rot * PI / HALF_CIRCLE_DEGREE; + std::vector src_dir = get_dir(-0.5 * src_w, 0, rot_rad); + std::vector dst_dir{static_cast(-0.5) * dst_w, 0.0}; + cv::Point2f srcPoint2f[3], dstPoint2f[3]; + srcPoint2f[0] = cv::Point2f(center[0], center[1]); + srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]); + srcPoint2f[2] = get_3rd_point(srcPoint2f[0], srcPoint2f[1]); + + dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5); + dstPoint2f[1] = + cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]); + dstPoint2f[2] = get_3rd_point(dstPoint2f[0], dstPoint2f[1]); + if (inv == 0) { + trans = cv::getAffineTransform(srcPoint2f, dstPoint2f); + } else { + trans = cv::getAffineTransform(dstPoint2f, srcPoint2f); + } +} + +void transform_preds(std::vector& coords, + std::vector& center, + std::vector& scale, + std::vector& output_size, + std::vector& dim, + std::vector& target_coords) { + cv::Mat trans(2, 3, CV_64FC1); + get_affine_transform(center, scale, 0, output_size, trans, 1); + for (int p = 0; p < dim[1]; ++p) { + affine_tranform(coords[p * 2], coords[p * 2 + 1], trans, target_coords, p); + } +} + +// only for batchsize == 1 +void get_max_preds(std::vector& heatmap, + std::vector& dim, + std::vector& preds, + std::vector& maxvals, + int batchid, + int joint_idx) { + int num_joints = dim[1]; + int width = dim[3]; + std::vector idx; + idx.resize(num_joints * 2); + + for (int j = 0; j < dim[1]; j++) { + float* index = &( + heatmap[batchid * num_joints * dim[2] * dim[3] + j * dim[2] * dim[3]]); + float* end = index + dim[2] * dim[3]; + float* max_dis = std::max_element(index, end); + auto max_id = std::distance(index, max_dis); + maxvals[j] = *max_dis; + if (*max_dis > 0) { + preds[j * 2] = static_cast(max_id % width); + preds[j * 2 + 1] = static_cast(max_id / width); + } + } +} + +void dark_parse(std::vector& heatmap, + std::vector& dim, + std::vector& coords, + int px, + int py, + int index, + int ch) { + /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate + Representation for Human Pose Estimation (CVPR 2020). + 1) offset = - hassian.inv() * derivative + 2) dx = (heatmap[x+1] - heatmap[x-1])/2. + 3) dxx = (dx[x+1] - dx[x-1])/2. + 4) derivative = Mat([dx, dy]) + 5) hassian = Mat([[dxx, dxy], [dxy, dyy]]) + */ + std::vector::const_iterator first1 = heatmap.begin() + index; + std::vector::const_iterator last1 = + heatmap.begin() + index + dim[2] * dim[3]; + std::vector heatmap_ch(first1, last1); + cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]); + heatmap_mat.convertTo(heatmap_mat, CV_32FC1); + cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0); + heatmap_mat = heatmap_mat.reshape(1, 1); + heatmap_ch = std::vector(heatmap_mat.reshape(1, 1)); + + float epsilon = 1e-10; + // sample heatmap to get values in around target location + float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon)); + float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon)); + float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon)); + + float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon)); + float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon)); + float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon)); + float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon)); + float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon)); + float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon)); + float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon)); + float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon)); + float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon)); + float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon)); + + // compute dx/dy and dxx/dyy with sampled values + float dx = 0.5 * (xr - xl); + float dy = 0.5 * (yu - yd); + float dxx = 0.25 * (xr2 - 2 * xy + xl2); + float dxy = 0.25 * (xryu - xryd - xlyu + xlyd); + float dyy = 0.25 * (yu2 - 2 * xy + yd2); + + // finally get offset by derivative and hassian, which combined by dx/dy and + // dxx/dyy + if (dxx * dyy - dxy * dxy != 0) { + float M[2][2] = {dxx, dxy, dxy, dyy}; + float D[2] = {dx, dy}; + cv::Mat hassian(2, 2, CV_32F, M); + cv::Mat derivative(2, 1, CV_32F, D); + cv::Mat offset = -hassian.inv() * derivative; + coords[ch * 2] += offset.at(0, 0); + coords[ch * 2 + 1] += offset.at(1, 0); + } +} + +void get_final_preds(std::vector& heatmap, + std::vector& dim, + std::vector& idxout, + std::vector& idxdim, + std::vector& center, + std::vector scale, + std::vector& preds, + int batchid, + bool DARK) { + std::vector coords; + coords.resize(dim[1] * 2); + int heatmap_height = dim[2]; + int heatmap_width = dim[3]; + + for (int j = 0; j < dim[1]; ++j) { + int index = (batchid * dim[1] + j) * dim[2] * dim[3]; + + int idx = idxout[batchid * dim[1] + j]; + preds[j * 3] = heatmap[index + idx]; + coords[j * 2] = idx % heatmap_width; + coords[j * 2 + 1] = idx / heatmap_width; + + int px = int(coords[j * 2] + 0.5); + int py = int(coords[j * 2 + 1] + 0.5); + + if (DARK && px > 1 && px < heatmap_width - 2 && py > 1 && + py < heatmap_height - 2) { + dark_parse(heatmap, dim, coords, px, py, index, j); + } else { + if (px > 0 && px < heatmap_width - 1) { + float diff_x = heatmap[index + py * dim[3] + px + 1] - + heatmap[index + py * dim[3] + px - 1]; + coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25; + } + if (py > 0 && py < heatmap_height - 1) { + float diff_y = heatmap[index + (py + 1) * dim[3] + px] - + heatmap[index + (py - 1) * dim[3] + px]; + coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25; + } + } + } + + std::vector img_size{heatmap_width, heatmap_height}; + transform_preds(coords, center, scale, img_size, dim, preds); +} + +void CropImg(cv::Mat& img, + cv::Mat& crop_img, + std::vector& area, + std::vector& center, + std::vector& scale, + float expandratio) { + int crop_x1 = std::max(0, area[0]); + int crop_y1 = std::max(0, area[1]); + int crop_x2 = std::min(img.cols - 1, area[2]); + int crop_y2 = std::min(img.rows - 1, area[3]); + + int center_x = (crop_x1 + crop_x2) / 2.; + int center_y = (crop_y1 + crop_y2) / 2.; + int half_h = (crop_y2 - crop_y1) / 2.; + int half_w = (crop_x2 - crop_x1) / 2.; + + if (half_h * 3 > half_w * 4) { + half_w = static_cast(half_h * 0.75); + } else { + half_h = static_cast(half_w * 4 / 3); + } + + crop_x1 = + std::max(0, center_x - static_cast(half_w * (1 + expandratio))); + crop_y1 = + std::max(0, center_y - static_cast(half_h * (1 + expandratio))); + crop_x2 = std::min(img.cols - 1, + static_cast(center_x + half_w * (1 + expandratio))); + crop_y2 = std::min(img.rows - 1, + static_cast(center_y + half_h * (1 + expandratio))); + crop_img = + img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1)); + + center.clear(); + center.emplace_back((crop_x1 + crop_x2) / 2); + center.emplace_back((crop_y1 + crop_y2) / 2); + scale.clear(); + scale.emplace_back((crop_x2 - crop_x1)); + scale.emplace_back((crop_y2 - crop_y1)); +} diff --git a/deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.h b/deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.h new file mode 100644 index 000000000..928a57bb0 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/keypoint_postprocess.h @@ -0,0 +1,67 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +std::vector get_3rd_point(std::vector& a, std::vector& b); +std::vector get_dir(float src_point_x, float src_point_y, float rot_rad); +void affine_tranform(float pt_x, + float pt_y, + cv::Mat& trans, + std::vector& x, + int p, + int num); +cv::Mat get_affine_transform(std::vector& center, + std::vector& scale, + float rot, + std::vector& output_size, + int inv); +void transform_preds(std::vector& coords, + std::vector& center, + std::vector& scale, + std::vector& output_size, + std::vector& dim, + std::vector& target_coords); +void box_to_center_scale(std::vector& box, + int width, + int height, + std::vector& center, + std::vector& scale); +void get_max_preds(std::vector& heatmap, + std::vector& dim, + std::vector& preds, + std::vector& maxvals, + int batchid, + int joint_idx); +void get_final_preds(std::vector& heatmap, + std::vector& dim, + std::vector& idxout, + std::vector& idxdim, + std::vector& center, + std::vector scale, + std::vector& preds, + int batchid, + bool DARK = true); + +void CropImg(cv::Mat& img, + cv::Mat& crop_img, + std::vector& area, + std::vector& center, + std::vector& scale, + float expandratio = 0.25); diff --git a/deploy/third_engine/demo_mnn_kpts/main.cpp b/deploy/third_engine/demo_mnn_kpts/main.cpp new file mode 100644 index 000000000..f03e983c6 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/main.cpp @@ -0,0 +1,424 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn + +#include +#include +#include +#include +#include "keypoint_detector.h" +#include "picodet_mnn.h" + +#define __SAVE_RESULT__ // if defined save drawed results to ../results, else + // show it in windows + +using namespace PaddleDetection; + +struct object_rect { + int x; + int y; + int width; + int height; +}; + +int resize_uniform(cv::Mat& src, + cv::Mat& dst, + cv::Size dst_size, + object_rect& effect_area) { + int w = src.cols; + int h = src.rows; + int dst_w = dst_size.width; + int dst_h = dst_size.height; + dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0)); + + float ratio_src = w * 1.0 / h; + float ratio_dst = dst_w * 1.0 / dst_h; + + int tmp_w = 0; + int tmp_h = 0; + if (ratio_src > ratio_dst) { + tmp_w = dst_w; + tmp_h = floor((dst_w * 1.0 / w) * h); + } else if (ratio_src < ratio_dst) { + tmp_h = dst_h; + tmp_w = floor((dst_h * 1.0 / h) * w); + } else { + cv::resize(src, dst, dst_size); + effect_area.x = 0; + effect_area.y = 0; + effect_area.width = dst_w; + effect_area.height = dst_h; + return 0; + } + cv::Mat tmp; + cv::resize(src, tmp, cv::Size(tmp_w, tmp_h)); + + if (tmp_w != dst_w) { + int index_w = floor((dst_w - tmp_w) / 2.0); + for (int i = 0; i < dst_h; i++) { + memcpy(dst.data + i * dst_w * 3 + index_w * 3, + tmp.data + i * tmp_w * 3, + tmp_w * 3); + } + effect_area.x = index_w; + effect_area.y = 0; + effect_area.width = tmp_w; + effect_area.height = tmp_h; + } else if (tmp_h != dst_h) { + int index_h = floor((dst_h - tmp_h) / 2.0); + memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3); + effect_area.x = 0; + effect_area.y = index_h; + effect_area.width = tmp_w; + effect_area.height = tmp_h; + } else { + printf("error\n"); + } + return 0; +} + +const int color_list[80][3] = { + {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, + {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, {153, 153, 153}, + {255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0}, + {0, 0, 255}, {170, 0, 255}, {84, 84, 0}, {84, 170, 0}, + {84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0}, + {255, 84, 0}, {255, 170, 0}, {255, 255, 0}, {0, 84, 127}, + {0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127}, + {84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127}, + {170, 170, 127}, {170, 255, 127}, {255, 0, 127}, {255, 84, 127}, + {255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255}, + {0, 255, 255}, {84, 0, 255}, {84, 84, 255}, {84, 170, 255}, + {84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255}, + {170, 255, 255}, {255, 0, 255}, {255, 84, 255}, {255, 170, 255}, + {42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0}, + {212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0}, + {0, 127, 0}, {0, 170, 0}, {0, 212, 0}, {0, 255, 0}, + {0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170}, + {0, 0, 212}, {0, 0, 255}, {0, 0, 0}, {36, 36, 36}, + {72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182}, + {218, 218, 218}, {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, +}; + +void draw_bboxes(const cv::Mat& bgr, + const std::vector& bboxes, + object_rect effect_roi, + std::string save_path = "None") { + static const char* class_names[] = { + "person", "bicycle", "car", + "motorcycle", "airplane", "bus", + "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", + "parking meter", "bench", "bird", + "cat", "dog", "horse", + "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", + "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", + "fork", "knife", "spoon", + "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", + "donut", "cake", "chair", + "couch", "potted plant", "bed", + "dining table", "toilet", "tv", + "laptop", "mouse", "remote", + "keyboard", "cell phone", "microwave", + "oven", "toaster", "sink", + "refrigerator", "book", "clock", + "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; + + cv::Mat image = bgr.clone(); + int src_w = image.cols; + int src_h = image.rows; + int dst_w = effect_roi.width; + int dst_h = effect_roi.height; + float width_ratio = (float)src_w / (float)dst_w; + float height_ratio = (float)src_h / (float)dst_h; + + for (size_t i = 0; i < bboxes.size(); i++) { + const BoxInfo& bbox = bboxes[i]; + cv::Scalar color = cv::Scalar(color_list[bbox.label][0], + color_list[bbox.label][1], + color_list[bbox.label][2]); + cv::rectangle(image, + cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, + (bbox.y1 - effect_roi.y) * height_ratio), + cv::Point((bbox.x2 - effect_roi.x) * width_ratio, + (bbox.y2 - effect_roi.y) * height_ratio)), + color); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); + + int baseLine = 0; + cv::Size label_size = + cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + + int x = (bbox.x1 - effect_roi.x) * width_ratio; + int y = + (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine; + if (y < 0) y = 0; + if (x + label_size.width > image.cols) x = image.cols - label_size.width; + + cv::rectangle( + image, + cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, label_size.height + baseLine)), + color, + -1); + + cv::putText(image, + text, + cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, + 0.4, + cv::Scalar(255, 255, 255)); + } + + if (save_path == "None") { + cv::imshow("image", image); + } else { + cv::imwrite(save_path, image); + std::cout << save_path << std::endl; + } +} + +std::vector coordsback(const cv::Mat image, + const object_rect effect_roi, + const std::vector& bboxes) { + int src_w = image.cols; + int src_h = image.rows; + int dst_w = effect_roi.width; + int dst_h = effect_roi.height; + float width_ratio = (float)src_w / (float)dst_w; + float height_ratio = (float)src_h / (float)dst_h; + + std::vector bboxes_oimg; + + for (int i = 0; i < bboxes.size(); i++) { + auto bbox = bboxes[i]; + bbox.x1 = (bbox.x1 - effect_roi.x) * width_ratio; + bbox.y1 = (bbox.y1 - effect_roi.y) * height_ratio; + bbox.x2 = (bbox.x2 - effect_roi.x) * width_ratio; + bbox.y2 = (bbox.y2 - effect_roi.y) * height_ratio; + bboxes_oimg.emplace_back(bbox); + } + return bboxes_oimg; +} + +void image_infer_kpts(KeyPointDetector* kpts_detector, + cv::Mat image, + const object_rect effect_roi, + const std::vector& results, + std::string img_name = "kpts_vis", + bool save_img = true) { + std::vector cropimgs; + std::vector> center_bs; + std::vector> scale_bs; + std::vector kpts_results; + auto results_oimg = coordsback(image, effect_roi, results); + + for (int i = 0; i < results_oimg.size(); i++) { + auto rect = results_oimg[i]; + if (rect.label == 0) { + cv::Mat cropimg; + std::vector center, scale; + std::vector area = {static_cast(rect.x1), + static_cast(rect.y1), + static_cast(rect.x2), + static_cast(rect.y2)}; + CropImg(image, cropimg, area, center, scale); + // cv::imwrite("./test_crop_"+std::to_string(i)+".jpg", cropimg); + cropimgs.emplace_back(cropimg); + center_bs.emplace_back(center); + scale_bs.emplace_back(scale); + } + if (cropimgs.size() == 1 || + (cropimgs.size() > 0 && i == results_oimg.size() - 1)) { + kpts_detector->Predict(cropimgs, center_bs, scale_bs, &kpts_results); + cropimgs.clear(); + center_bs.clear(); + scale_bs.clear(); + } + } + std::vector compression_params; + compression_params.push_back(cv::IMWRITE_JPEG_QUALITY); + compression_params.push_back(95); + std::string kpts_savepath = + "keypoint_" + img_name.substr(img_name.find_last_of('/') + 1); + cv::Mat kpts_vis_img = + VisualizeKptsResult(image, kpts_results, {0, 255, 0}, 0.3); + if (save_img) { + cv::imwrite(kpts_savepath, kpts_vis_img, compression_params); + printf("Visualized output saved as %s\n", kpts_savepath.c_str()); + } else { + cv::imshow("image", kpts_vis_img); + } +} + +int image_demo(PicoDet& detector, + KeyPointDetector* kpts_detector, + const char* imagepath) { + std::vector filenames; + cv::glob(imagepath, filenames, false); + + for (auto img_name : filenames) { + cv::Mat image = cv::imread(img_name); + if (image.empty()) { + fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); + return -1; + } + object_rect effect_roi; + cv::Mat resized_img; + resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); + std::vector results; + detector.detect(resized_img, results); + if (kpts_detector) { + image_infer_kpts(kpts_detector, image, effect_roi, results, img_name); + } + } + return 0; +} + +int webcam_demo(PicoDet& detector, + KeyPointDetector* kpts_detector, + int cam_id) { + cv::Mat image; + cv::VideoCapture cap(cam_id); + + while (true) { + cap >> image; + object_rect effect_roi; + cv::Mat resized_img; + resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); + std::vector results; + detector.detect(resized_img, results); + if (kpts_detector) { + image_infer_kpts(kpts_detector, image, effect_roi, results, "", false); + } + } + return 0; +} + +int video_demo(PicoDet& detector, + KeyPointDetector* kpts_detector, + const char* path) { + cv::Mat image; + cv::VideoCapture cap(path); + + while (true) { + cap >> image; + object_rect effect_roi; + cv::Mat resized_img; + resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); + std::vector results; + detector.detect(resized_img, results); + if (kpts_detector) { + image_infer_kpts(kpts_detector, image, effect_roi, results, "", false); + } + } + return 0; +} + +int benchmark(KeyPointDetector* kpts_detector) { + int loop_num = 100; + int warm_up = 8; + + double time_min = DBL_MAX; + double time_max = -DBL_MAX; + double time_avg = 0; + cv::Mat image(256, 192, CV_8UC3, cv::Scalar(1, 1, 1)); + std::vector center = {128, 96}; + std::vector scale = {256, 192}; + std::vector cropimgs = {image}; + std::vector> center_bs = {center}; + std::vector> scale_bs = {scale}; + std::vector kpts_results; + + for (int i = 0; i < warm_up + loop_num; i++) { + auto start = std::chrono::steady_clock::now(); + std::vector results; + kpts_detector->Predict(cropimgs, center_bs, scale_bs, &kpts_results); + auto end = std::chrono::steady_clock::now(); + + std::chrono::duration elapsed = end - start; + double time = elapsed.count(); + if (i >= warm_up) { + time_min = (std::min)(time_min, time); + time_max = (std::max)(time_max, time); + time_avg += time; + } + } + time_avg /= loop_num; + fprintf(stderr, + "%20s min = %7.2f max = %7.2f avg = %7.2f\n", + "tinypose", + time_min, + time_max, + time_avg); + return 0; +} + +int main(int argc, char** argv) { + if (argc != 3) { + fprintf(stderr, + "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n " + "For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; " + "\n For benchmark, mode=3 path=0.\n", + argv[0]); + return -1; + } + PicoDet detector = + PicoDet("../weight/picodet_m_416.mnn", 416, 416, 4, 0.45, 0.3); + KeyPointDetector* kpts_detector = + new KeyPointDetector("../weight/tinypose256.mnn", 4, 256, 192); + int mode = atoi(argv[1]); + switch (mode) { + case 0: { + int cam_id = atoi(argv[2]); + webcam_demo(detector, kpts_detector, cam_id); + break; + } + case 1: { + const char* images = argv[2]; + image_demo(detector, kpts_detector, images); + break; + } + case 2: { + const char* path = argv[2]; + video_demo(detector, kpts_detector, path); + break; + } + case 3: { + benchmark(kpts_detector); + break; + } + default: { + fprintf(stderr, + "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; " + "\n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, " + "mode=2; \n For benchmark, mode=3 path=0.\n", + argv[0]); + break; + } + } + delete kpts_detector; + kpts_detector = nullptr; +} diff --git a/deploy/third_engine/demo_mnn_kpts/picodet_mnn.cpp b/deploy/third_engine/demo_mnn_kpts/picodet_mnn.cpp new file mode 100644 index 000000000..9f38e68c1 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/picodet_mnn.cpp @@ -0,0 +1,229 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn + +#include "picodet_mnn.h" + +using namespace std; + +PicoDet::PicoDet(const std::string &mnn_path, + int input_width, + int input_length, + int num_thread_, + float score_threshold_, + float nms_threshold_) { + num_thread = num_thread_; + in_w = input_width; + in_h = input_length; + score_threshold = score_threshold_; + nms_threshold = nms_threshold_; + + PicoDet_interpreter = std::shared_ptr( + MNN::Interpreter::createFromFile(mnn_path.c_str())); + MNN::ScheduleConfig config; + config.numThread = num_thread; + MNN::BackendConfig backendConfig; + backendConfig.precision = (MNN::BackendConfig::PrecisionMode)2; + config.backendConfig = &backendConfig; + + PicoDet_session = PicoDet_interpreter->createSession(config); + + input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr); +} + +PicoDet::~PicoDet() { + PicoDet_interpreter->releaseModel(); + PicoDet_interpreter->releaseSession(PicoDet_session); +} + +int PicoDet::detect(cv::Mat &raw_image, std::vector &result_list) { + if (raw_image.empty()) { + std::cout << "image is empty ,please check!" << std::endl; + return -1; + } + + image_h = raw_image.rows; + image_w = raw_image.cols; + cv::Mat image; + cv::resize(raw_image, image, cv::Size(in_w, in_h)); + + PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w}); + PicoDet_interpreter->resizeSession(PicoDet_session); + std::shared_ptr pretreat(MNN::CV::ImageProcess::create( + MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3, norm_vals, 3)); + pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor); + + auto start = chrono::steady_clock::now(); + + // run network + PicoDet_interpreter->runSession(PicoDet_session); + + // get output data + std::vector> results; + results.resize(num_class); + + for (const auto &head_info : heads_info) { + MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput( + PicoDet_session, head_info.cls_layer.c_str()); + MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput( + PicoDet_session, head_info.dis_layer.c_str()); + + MNN::Tensor tensor_scores_host(tensor_scores, + tensor_scores->getDimensionType()); + tensor_scores->copyToHostTensor(&tensor_scores_host); + + MNN::Tensor tensor_boxes_host(tensor_boxes, + tensor_boxes->getDimensionType()); + tensor_boxes->copyToHostTensor(&tensor_boxes_host); + + decode_infer(&tensor_scores_host, + &tensor_boxes_host, + head_info.stride, + score_threshold, + results); + } + + auto end = chrono::steady_clock::now(); + chrono::duration elapsed = end - start; + cout << "inference time:" << elapsed.count() << " s, "; + + for (int i = 0; i < (int)results.size(); i++) { + nms(results[i], nms_threshold); + + for (auto box : results[i]) { + box.x1 = box.x1 / in_w * image_w; + box.x2 = box.x2 / in_w * image_w; + box.y1 = box.y1 / in_h * image_h; + box.y2 = box.y2 / in_h * image_h; + result_list.push_back(box); + } + } + cout << "detect " << result_list.size() << " objects." << std::endl; + ; + + return 0; +} + +void PicoDet::decode_infer(MNN::Tensor *cls_pred, + MNN::Tensor *dis_pred, + int stride, + float threshold, + std::vector> &results) { + int feature_h = in_h / stride; + int feature_w = in_w / stride; + + for (int idx = 0; idx < feature_h * feature_w; idx++) { + const float *scores = cls_pred->host() + (idx * num_class); + int row = idx / feature_w; + int col = idx % feature_w; + float score = 0; + int cur_label = 0; + for (int label = 0; label < num_class; label++) { + if (scores[label] > score) { + score = scores[label]; + cur_label = label; + } + } + if (score > threshold) { + const float *bbox_pred = + dis_pred->host() + (idx * 4 * (reg_max + 1)); + results[cur_label].push_back( + disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); + } + } +} + +BoxInfo PicoDet::disPred2Bbox( + const float *&dfl_det, int label, float score, int x, int y, int stride) { + float ct_x = (x + 0.5) * stride; + float ct_y = (y + 0.5) * stride; + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + float dis = 0; + float *dis_after_sm = new float[reg_max + 1]; + activation_function_softmax( + dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); + for (int j = 0; j < reg_max + 1; j++) { + dis += j * dis_after_sm[j]; + } + dis *= stride; + dis_pred[i] = dis; + delete[] dis_after_sm; + } + float xmin = (std::max)(ct_x - dis_pred[0], .0f); + float ymin = (std::max)(ct_y - dis_pred[1], .0f); + float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w); + float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h); + return BoxInfo{xmin, ymin, xmax, ymax, score, label}; +} + +void PicoDet::nms(std::vector &input_boxes, float NMS_THRESH) { + std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { + return a.score > b.score; + }); + std::vector vArea(input_boxes.size()); + for (int i = 0; i < int(input_boxes.size()); ++i) { + vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * + (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); + } + for (int i = 0; i < int(input_boxes.size()); ++i) { + for (int j = i + 1; j < int(input_boxes.size());) { + float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); + float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); + float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); + float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); + float w = (std::max)(float(0), xx2 - xx1 + 1); + float h = (std::max)(float(0), yy2 - yy1 + 1); + float inter = w * h; + float ovr = inter / (vArea[i] + vArea[j] - inter); + if (ovr >= NMS_THRESH) { + input_boxes.erase(input_boxes.begin() + j); + vArea.erase(vArea.begin() + j); + } else { + j++; + } + } + } +} + +string PicoDet::get_label_str(int label) { return labels[label]; } + +inline float fast_exp(float x) { + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; +} + +inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); } + +template +int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { + const _Tp alpha = *std::max_element(src, src + length); + _Tp denominator{0}; + + for (int i = 0; i < length; ++i) { + dst[i] = fast_exp(src[i] - alpha); + denominator += dst[i]; + } + + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } + + return 0; +} diff --git a/deploy/third_engine/demo_mnn_kpts/picodet_mnn.h b/deploy/third_engine/demo_mnn_kpts/picodet_mnn.h new file mode 100644 index 000000000..8686f5cf6 --- /dev/null +++ b/deploy/third_engine/demo_mnn_kpts/picodet_mnn.h @@ -0,0 +1,138 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn + +#ifndef __PicoDet_H__ +#define __PicoDet_H__ + +#pragma once + +#include "Interpreter.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include "ImageProcess.hpp" +#include "MNNDefine.h" +#include "Tensor.hpp" + +typedef struct HeadInfo_ { + std::string cls_layer; + std::string dis_layer; + int stride; +} HeadInfo; + +typedef struct BoxInfo_ { + float x1; + float y1; + float x2; + float y2; + float score; + int label; +} BoxInfo; + +class PicoDet { + public: + PicoDet(const std::string &mnn_path, + int input_width, + int input_length, + int num_thread_ = 4, + float score_threshold_ = 0.5, + float nms_threshold_ = 0.3); + + ~PicoDet(); + + int detect(cv::Mat &img, std::vector &result_list); + std::string get_label_str(int label); + + private: + void decode_infer(MNN::Tensor *cls_pred, + MNN::Tensor *dis_pred, + int stride, + float threshold, + std::vector> &results); + BoxInfo disPred2Bbox( + const float *&dfl_det, int label, float score, int x, int y, int stride); + void nms(std::vector &input_boxes, float NMS_THRESH); + + private: + std::shared_ptr PicoDet_interpreter; + MNN::Session *PicoDet_session = nullptr; + MNN::Tensor *input_tensor = nullptr; + + int num_thread; + int image_w; + int image_h; + + int in_w = 320; + int in_h = 320; + + float score_threshold; + float nms_threshold; + + const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; + const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f}; + + const int num_class = 80; + const int reg_max = 7; + + std::vector heads_info{ + // cls_pred|dis_pred|stride + {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8}, + {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16}, + {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32}, + {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64}, + }; + + std::vector labels{ + "person", "bicycle", "car", + "motorcycle", "airplane", "bus", + "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", + "parking meter", "bench", "bird", + "cat", "dog", "horse", + "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", + "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", + "fork", "knife", "spoon", + "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", + "donut", "cake", "chair", + "couch", "potted plant", "bed", + "dining table", "toilet", "tv", + "laptop", "mouse", "remote", + "keyboard", "cell phone", "microwave", + "oven", "toaster", "sink", + "refrigerator", "book", "clock", + "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; +}; + +template +int activation_function_softmax(const _Tp *src, _Tp *dst, int length); + +inline float fast_exp(float x); +inline float sigmoid(float x); + +#endif -- GitLab