From a6189b1213b53b21bd0c8d977398f50b71308b5d Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Mon, 18 Apr 2022 09:52:09 +0800 Subject: [PATCH] update picodet ncnn and mnn demo (#5721) --- configs/picodet/README.md | 15 +- configs/picodet/README_en.md | 14 +- deploy/third_engine/demo_mnn/CMakeLists.txt | 7 +- deploy/third_engine/demo_mnn/README.md | 108 +-- deploy/third_engine/demo_mnn/main.cpp | 499 ++++------- deploy/third_engine/demo_mnn/picodet_mnn.cpp | 72 +- deploy/third_engine/demo_mnn/picodet_mnn.hpp | 129 ++- .../third_engine/demo_mnn/python/demo_mnn.py | 803 ----------------- deploy/third_engine/demo_ncnn/CMakeLists.txt | 8 +- deploy/third_engine/demo_ncnn/README.md | 116 +-- deploy/third_engine/demo_ncnn/main.cpp | 505 ++++------- deploy/third_engine/demo_ncnn/picodet.cpp | 85 +- deploy/third_engine/demo_ncnn/picodet.h | 102 +-- .../demo_ncnn/python/demo_ncnn.py | 808 ------------------ 14 files changed, 714 insertions(+), 2557 deletions(-) delete mode 100644 deploy/third_engine/demo_mnn/python/demo_mnn.py delete mode 100644 deploy/third_engine/demo_ncnn/python/demo_ncnn.py diff --git a/configs/picodet/README.md b/configs/picodet/README.md index fde657de2..7ccf4c16e 100644 --- a/configs/picodet/README.md +++ b/configs/picodet/README.md @@ -226,11 +226,16 @@ paddle2onnx --model_dir output_inference/picodet_s_320_coco_lcnet/ \ ### 部署 -- OpenVINO demo [Python](../../deploy/third_engine/demo_openvino/python) -- [PaddleLite C++ demo](../../deploy/lite) -- [Android demo(Paddle Lite)](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo) -- ONNXRuntime demo [Python](../../deploy/third_engine/demo_onnxruntime) -- PaddleInference demo [Python](../../deploy/python) & [C++](../../deploy/cpp) +| 预测库 | Python | C++ | 带后处理预测 | +| :-------- | :--------: | :---------------------: | :----------------: | +| OpenVINO | [Python](../../deploy/third_engine/demo_openvino/python) | [C++](../../deploy/third_engine/demo_openvino)(带后处理开发中) | ✔︎ | +| Paddle Lite | - | [C++](../../deploy/lite) | ✔︎ | +| Android Demo | - | [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo) | ✔︎ | +| PaddleInference | [Python](../../deploy/python) | [C++](../../deploy/cpp) | ✔︎ | +| ONNXRuntime | [Python](../../deploy/third_engine/demo_onnxruntime) | Comming soon | ✔︎ | +| NCNN | Comming soon | [C++](../../deploy/third_engine/demo_ncnn) | ✘ | +| MNN | Comming soon | [C++](../../deploy/third_engine/demo_mnn) | ✘ | + Android demo可视化: diff --git a/configs/picodet/README_en.md b/configs/picodet/README_en.md index 6ab4e42cf..a92ffe7e9 100644 --- a/configs/picodet/README_en.md +++ b/configs/picodet/README_en.md @@ -222,11 +222,15 @@ paddle2onnx --model_dir output_inference/picodet_s_320_coco_lcnet/ \ ### Deploy -- OpenVINO demo [Python](../../deploy/third_engine/demo_openvino/python) -- [PaddleLite C++ demo](../../deploy/lite) -- [Android demo(Paddle Lite)](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo) -- ONNXRuntime demo [Python](../../deploy/third_engine/demo_onnxruntime) -- PaddleInference demo [Python](../../deploy/python) & [C++](../../deploy/cpp) +| Infer Engine | Python | C++ | Predict With Postprocess | +| :-------- | :--------: | :---------------------: | :----------------: | +| OpenVINO | [Python](../../deploy/third_engine/demo_openvino/python) | [C++](../../deploy/third_engine/demo_openvino)(postprocess comming soon) | ✔︎ | +| Paddle Lite | - | [C++](../../deploy/lite) | ✔︎ | +| Android Demo | - | [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo) | ✔︎ | +| PaddleInference | [Python](../../deploy/python) | [C++](../../deploy/cpp) | ✔︎ | +| ONNXRuntime | [Python](../../deploy/third_engine/demo_onnxruntime) | Comming soon | ✔︎ | +| NCNN | Comming soon | [C++](../../deploy/third_engine/demo_ncnn) | ✘ | +| MNN | Comming soon | [C++](../../deploy/third_engine/demo_mnn) | ✘ | Android demo visualization: diff --git a/deploy/third_engine/demo_mnn/CMakeLists.txt b/deploy/third_engine/demo_mnn/CMakeLists.txt index 07d9b7f86..9afa8cfc0 100644 --- a/deploy/third_engine/demo_mnn/CMakeLists.txt +++ b/deploy/third_engine/demo_mnn/CMakeLists.txt @@ -2,13 +2,14 @@ cmake_minimum_required(VERSION 3.9) project(picodet-mnn) set(CMAKE_CXX_STANDARD 17) +set(MNN_DIR PATHS "./mnn") # find_package(OpenCV REQUIRED PATHS "/work/dependence/opencv/opencv-3.4.3/build") find_package(OpenCV REQUIRED) include_directories( - /path/to/MNN/include/MNN - /path/to/MNN/include - . + ${MNN_DIR}/include + ${MNN_DIR}/include/MNN + ${CMAKE_SOURCE_DIR} ) link_directories(mnn/lib) diff --git a/deploy/third_engine/demo_mnn/README.md b/deploy/third_engine/demo_mnn/README.md index 78a0f3a79..ac11a8e18 100644 --- a/deploy/third_engine/demo_mnn/README.md +++ b/deploy/third_engine/demo_mnn/README.md @@ -1,105 +1,89 @@ # PicoDet MNN Demo -This fold provides PicoDet inference code using -[Alibaba's MNN framework](https://github.com/alibaba/MNN). Most of the implements in -this fold are same as *demo_ncnn*. +本Demo提供的预测代码是根据[Alibaba's MNN framework](https://github.com/alibaba/MNN) 推理库预测的。 -## Install MNN +## C++ Demo -### Python library - -Just run: - -``` shell -pip install MNN +- 第一步:根据[MNN官方编译文档](https://www.yuque.com/mnn/en/build_linux) 编译生成预测库. +- 第二步:编译或下载得到OpenCV库,可参考OpenCV官网,为了方便如果环境是gcc8.2 x86环境,可直接下载以下库: +```shell +wget https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz +tar -xf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz ``` -### C++ library - -Please follow the [official document](https://www.yuque.com/mnn/en/build_linux) to build MNN engine. -- Create picodet_m_416_coco.onnx +- 第三步:准备模型 ```shell - modelName=picodet_m_416_coco - # export model + modelName=picodet_s_320_coco_lcnet + # 导出Inference model python tools/export_model.py \ -c configs/picodet/${modelName}.yml \ -o weights=${modelName}.pdparams \ --output_dir=inference_model - # convert to onnx + # 转换到ONNX paddle2onnx --model_dir inference_model/${modelName} \ --model_filename model.pdmodel \ --params_filename model.pdiparams \ --opset_version 11 \ --save_file ${modelName}.onnx - # onnxsim + # 简化模型 python -m onnxsim ${modelName}.onnx ${modelName}_processed.onnx + # 将模型转换至MNN格式 + python -m MNN.tools.mnnconvert -f ONNX --modelFile picodet_s_320_lcnet_processed.onnx --MNNModel picodet_s_320_lcnet.mnn ``` +为了快速测试,可直接下载:[picodet_s_320_lcnet.mnn](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_lcnet.mnn)(不带后处理)。 -- Convert model - ``` shell - python -m MNN.tools.mnnconvert -f ONNX --modelFile picodet-416.onnx --MNNModel picodet-416.mnn - ``` -Here are converted model [download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416.mnn). +**注意:**由于MNN里,Matmul算子的输入shape如果不一致计算有问题,带后处理的Demo正在升级中,很快发布。 -## Build - -The python code *demo_mnn.py* can run directly and independently without main PicoDet repo. -`PicoDetONNX` and `PicoDetTorch` are two classes used to check the similarity of MNN inference results -with ONNX model and Pytorch model. They can be remove with no side effects. - -For C++ code, replace `libMNN.so` under *./mnn/lib* with the one you just compiled, modify OpenCV path and MNN path at CMake file, -and run +## 编译可执行程序 +- 第一步:导入lib包 +``` +mkdir mnn && cd mnn && mkdir lib +cp /path/to/MNN/build/libMNN.so . +cd .. +cp -r /path/to/MNN/include . +``` +- 第二步:修改CMakeLists.txt中OpenCV和MNN的路径 +- 第三步:开始编译 ``` shell mkdir build && cd build cmake .. make ``` +如果在build目录下生成`picodet-mnn`可执行文件,就证明成功了。 -Note that a flag at `main.cpp` is used to control whether to show the detection result or save it into a fold. - -``` c++ -#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows -``` - -## Run - -### Python - -`demo_mnn.py` provide an inference class `PicoDetMNN` that combines preprocess, post process, visualization. -Besides it can be used in command line with the form: +## 开始运行 +首先新建预测结果存放目录: ```shell -demo_mnn.py [-h] [--model_path MODEL_PATH] [--cfg_path CFG_PATH] - [--img_fold IMG_FOLD] [--result_fold RESULT_FOLD] - [--input_shape INPUT_SHAPE INPUT_SHAPE] - [--backend {MNN,ONNX,torch}] +cp -r ../demo_onnxruntime/imgs . +cd build +mkdir ../results ``` -For example: - +- 预测一张图片 ``` shell -# run MNN 416 model -python ./demo_mnn.py --model_path ../model/picodet-416.mnn --img_fold ../imgs --result_fold ../results -# run MNN 320 model -python ./demo_mnn.py --model_path ../model/picodet-320.mnn --input_shape 320 320 --backend MNN -# run onnx model -python ./demo_mnn.py --model_path ../model/sim.onnx --backend ONNX +./picodet-mnn 0 ../picodet_s_320_lcnet_3.mnn 320 320 ../imgs/dog.jpg ``` -### C++ - -C++ inference interface is same with NCNN code, to detect images in a fold, run: +-测试速度Benchmark ``` shell -./picodet-mnn "1" "../imgs/test.jpg" +./picodet-mnn 1 ../picodet_s_320_lcnet.mnn 320 320 ``` -For speed benchmark +## FAQ -``` shell -./picodet-mnn "3" "0" +- 预测结果精度不对: +请先确认模型输入shape是否对齐,并且模型输出name是否对齐,不带后处理的PicoDet增强版模型输出name如下: +```shell +# 分类分支 | 检测分支 +{"transpose_0.tmp_0", "transpose_1.tmp_0"}, +{"transpose_2.tmp_0", "transpose_3.tmp_0"}, +{"transpose_4.tmp_0", "transpose_5.tmp_0"}, +{"transpose_6.tmp_0", "transpose_7.tmp_0"}, ``` +可使用[netron](https://netron.app)查看具体name,并修改`picodet_mnn.hpp`中相应`non_postprocess_heads_info`数组。 ## Reference [MNN](https://github.com/alibaba/MNN) diff --git a/deploy/third_engine/demo_mnn/main.cpp b/deploy/third_engine/demo_mnn/main.cpp index 52c977343..5737368d5 100644 --- a/deploy/third_engine/demo_mnn/main.cpp +++ b/deploy/third_engine/demo_mnn/main.cpp @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn #include "picodet_mnn.hpp" #include @@ -19,354 +18,186 @@ #include #include -#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows +#define __SAVE_RESULT__ // if defined save drawed results to ../results, else + // show it in windows struct object_rect { - int x; - int y; - int width; - int height; + int x; + int y; + int width; + int height; }; -int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area) -{ - int w = src.cols; - int h = src.rows; - int dst_w = dst_size.width; - int dst_h = dst_size.height; - dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0)); - - float ratio_src = w * 1.0 / h; - float ratio_dst = dst_w * 1.0 / dst_h; - - int tmp_w = 0; - int tmp_h = 0; - if (ratio_src > ratio_dst) { - tmp_w = dst_w; - tmp_h = floor((dst_w * 1.0 / w) * h); - } - else if (ratio_src < ratio_dst) { - tmp_h = dst_h; - tmp_w = floor((dst_h * 1.0 / h) * w); - } - else { - cv::resize(src, dst, dst_size); - effect_area.x = 0; - effect_area.y = 0; - effect_area.width = dst_w; - effect_area.height = dst_h; - return 0; - } - cv::Mat tmp; - cv::resize(src, tmp, cv::Size(tmp_w, tmp_h)); - - if (tmp_w != dst_w) { - int index_w = floor((dst_w - tmp_w) / 2.0); - for (int i = 0; i < dst_h; i++) { - memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3); - } - effect_area.x = index_w; - effect_area.y = 0; - effect_area.width = tmp_w; - effect_area.height = tmp_h; - } - else if (tmp_h != dst_h) { - int index_h = floor((dst_h - tmp_h) / 2.0); - memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3); - effect_area.x = 0; - effect_area.y = index_h; - effect_area.width = tmp_w; - effect_area.height = tmp_h; - } - else { - printf("error\n"); +std::vector GenerateColorMap(int num_class) { + auto colormap = std::vector(3 * num_class, 0); + for (int i = 0; i < num_class; ++i) { + int j = 0; + int lab = i; + while (lab) { + colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j)); + colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); + colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); + ++j; + lab >>= 3; } - return 0; + } + return colormap; } -const int color_list[80][3] = -{ - {216 , 82 , 24}, - {236 ,176 , 31}, - {125 , 46 ,141}, - {118 ,171 , 47}, - { 76 ,189 ,237}, - {238 , 19 , 46}, - { 76 , 76 , 76}, - {153 ,153 ,153}, - {255 , 0 , 0}, - {255 ,127 , 0}, - {190 ,190 , 0}, - { 0 ,255 , 0}, - { 0 , 0 ,255}, - {170 , 0 ,255}, - { 84 , 84 , 0}, - { 84 ,170 , 0}, - { 84 ,255 , 0}, - {170 , 84 , 0}, - {170 ,170 , 0}, - {170 ,255 , 0}, - {255 , 84 , 0}, - {255 ,170 , 0}, - {255 ,255 , 0}, - { 0 , 84 ,127}, - { 0 ,170 ,127}, - { 0 ,255 ,127}, - { 84 , 0 ,127}, - { 84 , 84 ,127}, - { 84 ,170 ,127}, - { 84 ,255 ,127}, - {170 , 0 ,127}, - {170 , 84 ,127}, - {170 ,170 ,127}, - {170 ,255 ,127}, - {255 , 0 ,127}, - {255 , 84 ,127}, - {255 ,170 ,127}, - {255 ,255 ,127}, - { 0 , 84 ,255}, - { 0 ,170 ,255}, - { 0 ,255 ,255}, - { 84 , 0 ,255}, - { 84 , 84 ,255}, - { 84 ,170 ,255}, - { 84 ,255 ,255}, - {170 , 0 ,255}, - {170 , 84 ,255}, - {170 ,170 ,255}, - {170 ,255 ,255}, - {255 , 0 ,255}, - {255 , 84 ,255}, - {255 ,170 ,255}, - { 42 , 0 , 0}, - { 84 , 0 , 0}, - {127 , 0 , 0}, - {170 , 0 , 0}, - {212 , 0 , 0}, - {255 , 0 , 0}, - { 0 , 42 , 0}, - { 0 , 84 , 0}, - { 0 ,127 , 0}, - { 0 ,170 , 0}, - { 0 ,212 , 0}, - { 0 ,255 , 0}, - { 0 , 0 , 42}, - { 0 , 0 , 84}, - { 0 , 0 ,127}, - { 0 , 0 ,170}, - { 0 , 0 ,212}, - { 0 , 0 ,255}, - { 0 , 0 , 0}, - { 36 , 36 , 36}, - { 72 , 72 , 72}, - {109 ,109 ,109}, - {145 ,145 ,145}, - {182 ,182 ,182}, - {218 ,218 ,218}, - { 0 ,113 ,188}, - { 80 ,182 ,188}, - {127 ,127 , 0}, -}; - -void draw_bboxes(const cv::Mat& bgr, const std::vector& bboxes, object_rect effect_roi, std::string save_path="None") -{ - static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", - "train", "truck", "boat", "traffic light", "fire hydrant", - "stop sign", "parking meter", "bench", "bird", "cat", "dog", - "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", - "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", - "skis", "snowboard", "sports ball", "kite", "baseball bat", - "baseball glove", "skateboard", "surfboard", "tennis racket", - "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", "carrot", - "hot dog", "pizza", "donut", "cake", "chair", "couch", - "potted plant", "bed", "dining table", "toilet", "tv", "laptop", - "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", - "toaster", "sink", "refrigerator", "book", "clock", "vase", - "scissors", "teddy bear", "hair drier", "toothbrush" - }; - - cv::Mat image = bgr.clone(); - int src_w = image.cols; - int src_h = image.rows; - int dst_w = effect_roi.width; - int dst_h = effect_roi.height; - float width_ratio = (float)src_w / (float)dst_w; - float height_ratio = (float)src_h / (float)dst_h; - - - for (size_t i = 0; i < bboxes.size(); i++) - { - const BoxInfo& bbox = bboxes[i]; - cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]); - cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio), - cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color); - - char text[256]; - sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); - - int baseLine = 0; - cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); - - int x = (bbox.x1 - effect_roi.x) * width_ratio; - int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine; - if (y < 0) - y = 0; - if (x + label_size.width > image.cols) - x = image.cols - label_size.width; - - cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), - color, -1); - - cv::putText(image, text, cv::Point(x, y + label_size.height), - cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); - } - - if (save_path == "None") - { - cv::imshow("image", image); - } - else - { - cv::imwrite(save_path, image); - std::cout << save_path << std::endl; - } -} - - -int image_demo(PicoDet &detector, const char* imagepath) -{ - std::vector filenames; - cv::glob(imagepath, filenames, false); - - for (auto img_name : filenames) - { - cv::Mat image = cv::imread(img_name); - if (image.empty()) - { - fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); - return -1; - } - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); - std::vector results; - detector.detect(resized_img, results); - - #ifdef __SAVE_RESULT__ - std::string save_path = img_name; - draw_bboxes(image, results, effect_roi, save_path.replace(3, 4, "results")); - #else - draw_bboxes(image, results, effect_roi); - cv::waitKey(0); - #endif - - } - return 0; +void draw_bboxes(const cv::Mat &im, const std::vector &bboxes, + std::string save_path = "None") { + static const char *class_names[] = { + "person", "bicycle", "car", + "motorcycle", "airplane", "bus", + "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", + "parking meter", "bench", "bird", + "cat", "dog", "horse", + "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", + "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", + "fork", "knife", "spoon", + "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", + "donut", "cake", "chair", + "couch", "potted plant", "bed", + "dining table", "toilet", "tv", + "laptop", "mouse", "remote", + "keyboard", "cell phone", "microwave", + "oven", "toaster", "sink", + "refrigerator", "book", "clock", + "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; + + cv::Mat image = im.clone(); + int src_w = image.cols; + int src_h = image.rows; + int thickness = 2; + auto colormap = GenerateColorMap(sizeof(class_names)); + + for (size_t i = 0; i < bboxes.size(); i++) { + const BoxInfo &bbox = bboxes[i]; + std::cout << bbox.x1 << ". " << bbox.y1 << ". " << bbox.x2 << ". " + << bbox.y2 << ". " << std::endl; + int c1 = colormap[3 * bbox.label + 0]; + int c2 = colormap[3 * bbox.label + 1]; + int c3 = colormap[3 * bbox.label + 2]; + cv::Scalar color = cv::Scalar(c1, c2, c3); + // cv::Scalar color = cv::Scalar(0, 0, 255); + cv::rectangle(image, cv::Rect(cv::Point(bbox.x1, bbox.y1), + cv::Point(bbox.x2, bbox.y2)), + color, 1, cv::LINE_AA); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); + + int baseLine = 0; + cv::Size label_size = + cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + + int x = bbox.x1; + int y = bbox.y1 - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, + label_size.height + baseLine)), + color, -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255), 1, + cv::LINE_AA); + } + + if (save_path == "None") { + cv::imshow("image", image); + } else { + cv::imwrite(save_path, image); + std::cout << save_path << std::endl; + } } -int webcam_demo(PicoDet& detector, int cam_id) -{ - cv::Mat image; - cv::VideoCapture cap(cam_id); +int image_demo(PicoDet &detector, const char *imagepath) { + std::vector filenames; + cv::glob(imagepath, filenames, false); - while (true) - { - cap >> image; - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); - std::vector results; - detector.detect(resized_img, results); - draw_bboxes(image, results, effect_roi); - cv::waitKey(1); + for (auto img_name : filenames) { + cv::Mat image = cv::imread(img_name, cv::IMREAD_COLOR); + if (image.empty()) { + fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); + return -1; } - return 0; + std::vector results; + detector.detect(image, results, false); + std::cout << "detect done." << std::endl; + +#ifdef __SAVE_RESULT__ + std::string save_path = img_name; + draw_bboxes(image, results, save_path.replace(3, 4, "results")); +#else + draw_bboxes(image, results); + cv::waitKey(0); +#endif + } + return 0; } -int video_demo(PicoDet& detector, const char* path) -{ - cv::Mat image; - cv::VideoCapture cap(path); - - while (true) - { - cap >> image; - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); - std::vector results; - detector.detect(resized_img, results); - draw_bboxes(image, results, effect_roi); - cv::waitKey(1); +int benchmark(PicoDet &detector, int width, int height) { + int loop_num = 100; + int warm_up = 8; + + double time_min = DBL_MAX; + double time_max = -DBL_MAX; + double time_avg = 0; + cv::Mat image(width, height, CV_8UC3, cv::Scalar(1, 1, 1)); + for (int i = 0; i < warm_up + loop_num; i++) { + auto start = std::chrono::steady_clock::now(); + std::vector results; + detector.detect(image, results, false); + auto end = std::chrono::steady_clock::now(); + + std::chrono::duration elapsed = end - start; + double time = elapsed.count(); + if (i >= warm_up) { + time_min = (std::min)(time_min, time); + time_max = (std::max)(time_max, time); + time_avg += time; } - return 0; + } + time_avg /= loop_num; + fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", + time_min, time_max, time_avg); + return 0; } -int benchmark(PicoDet& detector) -{ - int loop_num = 100; - int warm_up = 8; - - double time_min = DBL_MAX; - double time_max = -DBL_MAX; - double time_avg = 0; - cv::Mat image(320, 320, CV_8UC3, cv::Scalar(1, 1, 1)); - for (int i = 0; i < warm_up + loop_num; i++) - { - auto start = std::chrono::steady_clock::now(); - std::vector results; - detector.detect(image, results); - auto end = std::chrono::steady_clock::now(); - - std::chrono::duration elapsed = end - start; - double time = elapsed.count(); - if (i >= warm_up) - { - time_min = (std::min)(time_min, time); - time_max = (std::max)(time_max, time); - time_avg += time; - } - } - time_avg /= loop_num; - fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", time_min, time_max, time_avg); - return 0; -} - - -int main(int argc, char** argv) -{ - if (argc != 3) - { - fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]); - return -1; - } - PicoDet detector = PicoDet("../weight/picodet-416.mnn", 416, 416, 4, 0.45, 0.3); - int mode = atoi(argv[1]); - switch (mode) - { - case 0:{ - int cam_id = atoi(argv[2]); - webcam_demo(detector, cam_id); - break; - } - case 1:{ - const char* images = argv[2]; - image_demo(detector, images); - break; - } - case 2:{ - const char* path = argv[2]; - video_demo(detector, path); - break; - } - case 3:{ - benchmark(detector); - break; - } - default:{ - fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]); - break; - } +int main(int argc, char **argv) { + int mode = atoi(argv[1]); + std::string model_path = argv[2]; + int height = 320; + int width = 320; + if (argc == 4) { + height = atoi(argv[3]); + width = atoi(argv[4]); + } + PicoDet detector = PicoDet(model_path, width, height, 4, 0.45, 0.3); + if (mode == 1) { + benchmark(detector, width, height); + } else { + if (argc != 5) { + std::cout << "Must set image file, such as ./picodet-mnn 0 " + "../picodet_s_320_lcnet.mnn 320 320 img.jpg" + << std::endl; } + const char *images = argv[5]; + image_demo(detector, images); + } } diff --git a/deploy/third_engine/demo_mnn/picodet_mnn.cpp b/deploy/third_engine/demo_mnn/picodet_mnn.cpp index d6cb9c9fd..a315f14a9 100644 --- a/deploy/third_engine/demo_mnn/picodet_mnn.cpp +++ b/deploy/third_engine/demo_mnn/picodet_mnn.cpp @@ -44,7 +44,8 @@ PicoDet::~PicoDet() { PicoDet_interpreter->releaseSession(PicoDet_session); } -int PicoDet::detect(cv::Mat &raw_image, std::vector &result_list) { +int PicoDet::detect(cv::Mat &raw_image, std::vector &result_list, + bool has_postprocess) { if (raw_image.empty()) { std::cout << "image is empty ,please check!" << std::endl; return -1; @@ -70,22 +71,57 @@ int PicoDet::detect(cv::Mat &raw_image, std::vector &result_list) { std::vector> results; results.resize(num_class); - for (const auto &head_info : heads_info) { - MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput( - PicoDet_session, head_info.cls_layer.c_str()); - MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput( - PicoDet_session, head_info.dis_layer.c_str()); - - MNN::Tensor tensor_scores_host(tensor_scores, - tensor_scores->getDimensionType()); - tensor_scores->copyToHostTensor(&tensor_scores_host); - - MNN::Tensor tensor_boxes_host(tensor_boxes, - tensor_boxes->getDimensionType()); - tensor_boxes->copyToHostTensor(&tensor_boxes_host); - - decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride, - score_threshold, results); + if (has_postprocess) { + auto bbox_out_tensor = PicoDet_interpreter->getSessionOutput( + PicoDet_session, nms_heads_info[0].c_str()); + auto class_out_tensor = PicoDet_interpreter->getSessionOutput( + PicoDet_session, nms_heads_info[1].c_str()); + // bbox branch + auto tensor_bbox_host = + new MNN::Tensor(bbox_out_tensor, MNN::Tensor::CAFFE); + bbox_out_tensor->copyToHostTensor(tensor_bbox_host); + auto bbox_output_shape = tensor_bbox_host->shape(); + int output_size = 1; + for (int j = 0; j < bbox_output_shape.size(); ++j) { + output_size *= bbox_output_shape[j]; + } + std::cout << "output_size:" << output_size << std::endl; + bbox_output_data_.resize(output_size); + std::copy_n(tensor_bbox_host->host(), output_size, + bbox_output_data_.data()); + delete tensor_bbox_host; + // class branch + auto tensor_class_host = + new MNN::Tensor(class_out_tensor, MNN::Tensor::CAFFE); + class_out_tensor->copyToHostTensor(tensor_class_host); + auto class_output_shape = tensor_class_host->shape(); + output_size = 1; + for (int j = 0; j < class_output_shape.size(); ++j) { + output_size *= class_output_shape[j]; + } + std::cout << "output_size:" << output_size << std::endl; + class_output_data_.resize(output_size); + std::copy_n(tensor_class_host->host(), output_size, + class_output_data_.data()); + delete tensor_class_host; + } else { + for (const auto &head_info : non_postprocess_heads_info) { + MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput( + PicoDet_session, head_info.cls_layer.c_str()); + MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput( + PicoDet_session, head_info.dis_layer.c_str()); + + MNN::Tensor tensor_scores_host(tensor_scores, + tensor_scores->getDimensionType()); + tensor_scores->copyToHostTensor(&tensor_scores_host); + + MNN::Tensor tensor_boxes_host(tensor_boxes, + tensor_boxes->getDimensionType()); + tensor_boxes->copyToHostTensor(&tensor_boxes_host); + + decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride, + score_threshold, results); + } } auto end = chrono::steady_clock::now(); @@ -188,8 +224,6 @@ void PicoDet::nms(std::vector &input_boxes, float NMS_THRESH) { } } -string PicoDet::get_label_str(int label) { return labels[label]; } - inline float fast_exp(float x) { union { uint32_t i; diff --git a/deploy/third_engine/demo_mnn/picodet_mnn.hpp b/deploy/third_engine/demo_mnn/picodet_mnn.hpp index ecece8b17..4744040e2 100644 --- a/deploy/third_engine/demo_mnn/picodet_mnn.hpp +++ b/deploy/third_engine/demo_mnn/picodet_mnn.hpp @@ -11,7 +11,6 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn #ifndef __PicoDet_H__ #define __PicoDet_H__ @@ -20,90 +19,84 @@ #include "Interpreter.hpp" +#include "ImageProcess.hpp" #include "MNNDefine.h" #include "Tensor.hpp" -#include "ImageProcess.hpp" -#include #include +#include #include +#include +#include #include #include -#include -#include - -typedef struct HeadInfo_ -{ - std::string cls_layer; - std::string dis_layer; - int stride; -} HeadInfo; - -typedef struct BoxInfo_ -{ - float x1; - float y1; - float x2; - float y2; - float score; - int label; +typedef struct NonPostProcessHeadInfo_ { + std::string cls_layer; + std::string dis_layer; + int stride; +} NonPostProcessHeadInfo; + +typedef struct BoxInfo_ { + float x1; + float y1; + float x2; + float y2; + float score; + int label; } BoxInfo; class PicoDet { public: - PicoDet(const std::string &mnn_path, - int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.5, float nms_threshold_ = 0.3); + PicoDet(const std::string &mnn_path, int input_width, int input_length, + int num_thread_ = 4, float score_threshold_ = 0.5, + float nms_threshold_ = 0.3); - ~PicoDet(); + ~PicoDet(); - int detect(cv::Mat &img, std::vector &result_list); - std::string get_label_str(int label); + int detect(cv::Mat &img, std::vector &result_list, + bool has_postprocess); private: - void decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector> &results); - BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride); - void nms(std::vector &input_boxes, float NMS_THRESH); + void decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, + float threshold, + std::vector> &results); + BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, + int y, int stride); + void nms(std::vector &input_boxes, float NMS_THRESH); private: - - std::shared_ptr PicoDet_interpreter; - MNN::Session *PicoDet_session = nullptr; - MNN::Tensor *input_tensor = nullptr; - - int num_thread; - int image_w; - int image_h; - - int in_w = 320; - int in_h = 320; - - float score_threshold; - float nms_threshold; - - const float mean_vals[3] = { 103.53f, 116.28f, 123.675f }; - const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f }; - - const int num_class = 80; - const int reg_max = 7; - - std::vector heads_info{ - // cls_pred|dis_pred|stride - {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8}, - {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16}, - {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32}, - {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64}, - }; - - std::vector - labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", - "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", - "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", - "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", - "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", - "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", - "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", - "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", - "hair drier", "toothbrush"}; + std::shared_ptr PicoDet_interpreter; + MNN::Session *PicoDet_session = nullptr; + MNN::Tensor *input_tensor = nullptr; + + int num_thread; + int image_w; + int image_h; + + int in_w = 320; + int in_h = 320; + + float score_threshold; + float nms_threshold; + + const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; + const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f}; + + const int num_class = 80; + const int reg_max = 7; + + std::vector bbox_output_data_; + std::vector class_output_data_; + + std::vector nms_heads_info{"tmp_16", "concat_4.tmp_0"}; + // If not export post-process, will use non_postprocess_heads_info + std::vector non_postprocess_heads_info{ + // cls_pred|dis_pred|stride + {"transpose_0.tmp_0", "transpose_1.tmp_0", 8}, + {"transpose_2.tmp_0", "transpose_3.tmp_0", 16}, + {"transpose_4.tmp_0", "transpose_5.tmp_0", 32}, + {"transpose_6.tmp_0", "transpose_7.tmp_0", 64}, + }; }; template diff --git a/deploy/third_engine/demo_mnn/python/demo_mnn.py b/deploy/third_engine/demo_mnn/python/demo_mnn.py deleted file mode 100644 index c5f880938..000000000 --- a/deploy/third_engine/demo_mnn/python/demo_mnn.py +++ /dev/null @@ -1,803 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn - -# -*- coding: utf-8 -*- -import argparse -from abc import ABCMeta, abstractmethod -from pathlib import Path - -import cv2 -import matplotlib.pyplot as plt -import numpy as np -from scipy.special import softmax -from tqdm import tqdm - -_COLORS = (np.array([ - 0.000, - 0.447, - 0.741, - 0.850, - 0.325, - 0.098, - 0.929, - 0.694, - 0.125, - 0.494, - 0.184, - 0.556, - 0.466, - 0.674, - 0.188, - 0.301, - 0.745, - 0.933, - 0.635, - 0.078, - 0.184, - 0.300, - 0.300, - 0.300, - 0.600, - 0.600, - 0.600, - 1.000, - 0.000, - 0.000, - 1.000, - 0.500, - 0.000, - 0.749, - 0.749, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 1.000, - 0.667, - 0.000, - 1.000, - 0.333, - 0.333, - 0.000, - 0.333, - 0.667, - 0.000, - 0.333, - 1.000, - 0.000, - 0.667, - 0.333, - 0.000, - 0.667, - 0.667, - 0.000, - 0.667, - 1.000, - 0.000, - 1.000, - 0.333, - 0.000, - 1.000, - 0.667, - 0.000, - 1.000, - 1.000, - 0.000, - 0.000, - 0.333, - 0.500, - 0.000, - 0.667, - 0.500, - 0.000, - 1.000, - 0.500, - 0.333, - 0.000, - 0.500, - 0.333, - 0.333, - 0.500, - 0.333, - 0.667, - 0.500, - 0.333, - 1.000, - 0.500, - 0.667, - 0.000, - 0.500, - 0.667, - 0.333, - 0.500, - 0.667, - 0.667, - 0.500, - 0.667, - 1.000, - 0.500, - 1.000, - 0.000, - 0.500, - 1.000, - 0.333, - 0.500, - 1.000, - 0.667, - 0.500, - 1.000, - 1.000, - 0.500, - 0.000, - 0.333, - 1.000, - 0.000, - 0.667, - 1.000, - 0.000, - 1.000, - 1.000, - 0.333, - 0.000, - 1.000, - 0.333, - 0.333, - 1.000, - 0.333, - 0.667, - 1.000, - 0.333, - 1.000, - 1.000, - 0.667, - 0.000, - 1.000, - 0.667, - 0.333, - 1.000, - 0.667, - 0.667, - 1.000, - 0.667, - 1.000, - 1.000, - 1.000, - 0.000, - 1.000, - 1.000, - 0.333, - 1.000, - 1.000, - 0.667, - 1.000, - 0.333, - 0.000, - 0.000, - 0.500, - 0.000, - 0.000, - 0.667, - 0.000, - 0.000, - 0.833, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 0.167, - 0.000, - 0.000, - 0.333, - 0.000, - 0.000, - 0.500, - 0.000, - 0.000, - 0.667, - 0.000, - 0.000, - 0.833, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 0.167, - 0.000, - 0.000, - 0.333, - 0.000, - 0.000, - 0.500, - 0.000, - 0.000, - 0.667, - 0.000, - 0.000, - 0.833, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 0.143, - 0.143, - 0.143, - 0.286, - 0.286, - 0.286, - 0.429, - 0.429, - 0.429, - 0.571, - 0.571, - 0.571, - 0.714, - 0.714, - 0.714, - 0.857, - 0.857, - 0.857, - 0.000, - 0.447, - 0.741, - 0.314, - 0.717, - 0.741, - 0.50, - 0.5, - 0, -]).astype(np.float32).reshape(-1, 3)) - - -def get_resize_matrix(raw_shape, dst_shape, keep_ratio): - """ - Get resize matrix for resizing raw img to input size - :param raw_shape: (width, height) of raw image - :param dst_shape: (width, height) of input image - :param keep_ratio: whether keep original ratio - :return: 3x3 Matrix - """ - r_w, r_h = raw_shape - d_w, d_h = dst_shape - Rs = np.eye(3) - if keep_ratio: - C = np.eye(3) - C[0, 2] = -r_w / 2 - C[1, 2] = -r_h / 2 - - if r_w / r_h < d_w / d_h: - ratio = d_h / r_h - else: - ratio = d_w / r_w - Rs[0, 0] *= ratio - Rs[1, 1] *= ratio - - T = np.eye(3) - T[0, 2] = 0.5 * d_w - T[1, 2] = 0.5 * d_h - return T @Rs @C - else: - Rs[0, 0] *= d_w / r_w - Rs[1, 1] *= d_h / r_h - return Rs - - -def warp_boxes(boxes, M, width, height): - """Apply transform to boxes - Copy from picodet/data/transform/warp.py - """ - n = len(boxes) - if n: - # warp points - xy = np.ones((n * 4, 3)) - xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( - n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = xy @M.T # transform - xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - xy = np.concatenate( - (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - # clip boxes - xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) - xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) - return xy.astype(np.float32) - else: - return boxes - - -def overlay_bbox_cv(img, all_box, class_names): - """Draw result boxes - Copy from picodet/util/visualization.py - """ - # all_box array of [label, x0, y0, x1, y1, score] - all_box.sort(key=lambda v: v[5]) - for box in all_box: - label, x0, y0, x1, y1, score = box - color = (_COLORS[label] * 255).astype(np.uint8).tolist() - text = "{}:{:.1f}%".format(class_names[label], score * 100) - txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255, - 255) - font = cv2.FONT_HERSHEY_SIMPLEX - txt_size = cv2.getTextSize(text, font, 0.5, 2)[0] - cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) - - cv2.rectangle( - img, - (x0, y0 - txt_size[1] - 1), - (x0 + txt_size[0] + txt_size[1], y0 - 1), - color, - -1, ) - cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1) - return img - - -def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): - """ - - Args: - box_scores (N, 5): boxes in corner-form and probabilities. - iou_threshold: intersection over union threshold. - top_k: keep top_k results. If k <= 0, keep all the results. - candidate_size: only consider the candidates with the highest scores. - Returns: - picked: a list of indexes of the kept boxes - """ - scores = box_scores[:, -1] - boxes = box_scores[:, :-1] - picked = [] - indexes = np.argsort(scores) - indexes = indexes[-candidate_size:] - while len(indexes) > 0: - current = indexes[-1] - picked.append(current) - if 0 < top_k == len(picked) or len(indexes) == 1: - break - current_box = boxes[current, :] - indexes = indexes[:-1] - rest_boxes = boxes[indexes, :] - iou = iou_of( - rest_boxes, - np.expand_dims( - current_box, axis=0), ) - indexes = indexes[iou <= iou_threshold] - - return box_scores[picked, :] - - -def iou_of(boxes0, boxes1, eps=1e-5): - """Return intersection-over-union (Jaccard index) of boxes. - - Args: - boxes0 (N, 4): ground truth boxes. - boxes1 (N or 1, 4): predicted boxes. - eps: a small number to avoid 0 as denominator. - Returns: - iou (N): IoU values. - """ - overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) - overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) - - overlap_area = area_of(overlap_left_top, overlap_right_bottom) - area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) - area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) - return overlap_area / (area0 + area1 - overlap_area + eps) - - -def area_of(left_top, right_bottom): - """Compute the areas of rectangles given two corners. - - Args: - left_top (N, 2): left top corner. - right_bottom (N, 2): right bottom corner. - - Returns: - area (N): return the area. - """ - hw = np.clip(right_bottom - left_top, 0.0, None) - return hw[..., 0] * hw[..., 1] - - -class PicoDetABC(metaclass=ABCMeta): - def __init__( - self, - input_shape=[416, 416], - reg_max=7, - strides=[8, 16, 32, 64], - prob_threshold=0.4, - iou_threshold=0.3, - num_candidate=1000, - top_k=-1, ): - self.strides = strides - self.input_shape = input_shape - self.reg_max = reg_max - self.prob_threshold = prob_threshold - self.iou_threshold = iou_threshold - self.num_candidate = num_candidate - self.top_k = top_k - self.img_mean = [103.53, 116.28, 123.675] - self.img_std = [57.375, 57.12, 58.395] - self.input_size = (self.input_shape[1], self.input_shape[0]) - self.class_names = [ - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic_light", - "fire_hydrant", - "stop_sign", - "parking_meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "backpack", - "umbrella", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports_ball", - "kite", - "baseball_bat", - "baseball_glove", - "skateboard", - "surfboard", - "tennis_racket", - "bottle", - "wine_glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot_dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted_plant", - "bed", - "dining_table", - "toilet", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell_phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "book", - "clock", - "vase", - "scissors", - "teddy_bear", - "hair_drier", - "toothbrush", - ] - - def preprocess(self, img): - # resize image - ResizeM = get_resize_matrix((img.shape[1], img.shape[0]), - self.input_size, True) - img_resize = cv2.warpPerspective(img, ResizeM, dsize=self.input_size) - # normalize image - img_input = img_resize.astype(np.float32) / 255 - img_mean = np.array( - self.img_mean, dtype=np.float32).reshape(1, 1, 3) / 255 - img_std = np.array( - self.img_std, dtype=np.float32).reshape(1, 1, 3) / 255 - img_input = (img_input - img_mean) / img_std - # expand dims - img_input = np.transpose(img_input, [2, 0, 1]) - img_input = np.expand_dims(img_input, axis=0) - return img_input, ResizeM - - def postprocess(self, scores, raw_boxes, ResizeM, raw_shape): - # generate centers - decode_boxes = [] - select_scores = [] - for stride, box_distribute, score in zip(self.strides, raw_boxes, - scores): - # centers - fm_h = self.input_shape[0] / stride - fm_w = self.input_shape[1] / stride - h_range = np.arange(fm_h) - w_range = np.arange(fm_w) - ww, hh = np.meshgrid(w_range, h_range) - ct_row = (hh.flatten() + 0.5) * stride - ct_col = (ww.flatten() + 0.5) * stride - center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1) - - # box distribution to distance - reg_range = np.arange(self.reg_max + 1) - box_distance = box_distribute.reshape((-1, self.reg_max + 1)) - box_distance = softmax(box_distance, axis=1) - box_distance = box_distance * np.expand_dims(reg_range, axis=0) - box_distance = np.sum(box_distance, axis=1).reshape((-1, 4)) - box_distance = box_distance * stride - - # top K candidate - topk_idx = np.argsort(score.max(axis=1))[::-1] - topk_idx = topk_idx[:C] - center = center[topk_idx] - score = score[topk_idx] - box_distance = box_distance[topk_idx] - - # decode box - decode_box = center + [-1, -1, 1, 1] * box_distance - - select_scores.append(score) - decode_boxes.append(decode_box) - - # nms - bboxes = np.concatenate(decode_boxes, axis=0) - confidences = np.concatenate(select_scores, axis=0) - picked_box_probs = [] - picked_labels = [] - for class_index in range(0, confidences.shape[1]): - probs = confidences[:, class_index] - mask = probs > self.prob_threshold - probs = probs[mask] - if probs.shape[0] == 0: - continue - subset_boxes = bboxes[mask, :] - box_probs = np.concatenate( - [subset_boxes, probs.reshape(-1, 1)], axis=1) - box_probs = hard_nms( - box_probs, - iou_threshold=self.iou_threshold, - top_k=self.top_k, ) - picked_box_probs.append(box_probs) - picked_labels.extend([class_index] * box_probs.shape[0]) - if not picked_box_probs: - return np.array([]), np.array([]), np.array([]) - picked_box_probs = np.concatenate(picked_box_probs) - - # resize output boxes - picked_box_probs[:, :4] = warp_boxes(picked_box_probs[:, :4], - np.linalg.inv(ResizeM), - raw_shape[1], raw_shape[0]) - return ( - picked_box_probs[:, :4].astype(np.int32), - np.array(picked_labels), - picked_box_probs[:, 4], ) - - @abstractmethod - def infer_image(self, img_input): - pass - - def detect(self, img): - raw_shape = img.shape - img_input, ResizeM = self.preprocess(img) - scores, raw_boxes = self.infer_image(img_input) - if scores[0].ndim == 1: # handling num_classes=1 case - scores = [x[:, None] for x in scores] - bbox, label, score = self.postprocess(scores, raw_boxes, ResizeM, - raw_shape) - - print(bbox, score) - return bbox, label, score - - def draw_box(self, raw_img, bbox, label, score): - img = raw_img.copy() - all_box = [[x, ] + y + [z, ] - for x, y, z in zip(label, bbox.tolist(), score)] - img_draw = overlay_bbox_cv(img, all_box, self.class_names) - return img_draw - - def detect_folder(self, img_fold, result_path): - img_fold = Path(img_fold) - result_path = Path(result_path) - result_path.mkdir(parents=True, exist_ok=True) - - img_name_list = filter( - lambda x: str(x).endswith(".png") or str(x).endswith(".jpg"), - img_fold.iterdir(), ) - img_name_list = list(img_name_list) - print(f"find {len(img_name_list)} images") - - for img_path in tqdm(img_name_list): - img = cv2.imread(str(img_path)) - bbox, label, score = self.detect(img) - img_draw = self.draw_box(img, bbox, label, score) - save_path = str(result_path / img_path.name.replace(".png", ".jpg")) - cv2.imwrite(save_path, img_draw) - - -class PicoDetMNN(PicoDetABC): - import MNN as MNNlib - - def __init__(self, model_path, *args, **kwargs): - super(PicoDetMNN, self).__init__(*args, **kwargs) - print("Using MNN as inference backend") - print(f"Using weight: {model_path}") - - # load model - self.model_path = model_path - self.interpreter = self.MNNlib.Interpreter(self.model_path) - self.session = self.interpreter.createSession() - self.input_tensor = self.interpreter.getSessionInput(self.session) - - def infer_image(self, img_input): - tmp_input = self.MNNlib.Tensor( - (1, 3, self.input_size[1], self.input_size[0]), - self.MNNlib.Halide_Type_Float, - img_input, - self.MNNlib.Tensor_DimensionType_Caffe, ) - self.input_tensor.copyFrom(tmp_input) - self.interpreter.runSession(self.session) - score_out_name = [ - "save_infer_model/scale_0.tmp_1", "save_infer_model/scale_1.tmp_1", - "save_infer_model/scale_2.tmp_1", "save_infer_model/scale_3.tmp_1" - ] - scores = [ - self.interpreter.getSessionOutput(self.session, x).getData() - for x in score_out_name - ] - scores = [np.reshape(x, (-1, 80)) for x in scores] - boxes_out_name = [ - "save_infer_model/scale_4.tmp_1", "save_infer_model/scale_5.tmp_1", - "save_infer_model/scale_6.tmp_1", "save_infer_model/scale_7.tmp_1" - ] - raw_boxes = [ - self.interpreter.getSessionOutput(self.session, x).getData() - for x in boxes_out_name - ] - raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes] - return scores, raw_boxes - - -class PicoDetONNX(PicoDetABC): - import onnxruntime as ort - - def __init__(self, model_path, *args, **kwargs): - super(PicoDetONNX, self).__init__(*args, **kwargs) - print("Using ONNX as inference backend") - print(f"Using weight: {model_path}") - - # load model - self.model_path = model_path - self.ort_session = self.ort.InferenceSession(self.model_path) - self.input_name = self.ort_session.get_inputs()[0].name - - def infer_image(self, img_input): - inference_results = self.ort_session.run(None, - {self.input_name: img_input}) - scores = [np.squeeze(x) for x in inference_results[:3]] - raw_boxes = [np.squeeze(x) for x in inference_results[3:]] - return scores, raw_boxes - - -class PicoDetTorch(PicoDetABC): - import torch - - def __init__(self, model_path, cfg_path, *args, **kwargs): - from picodet.model.arch import build_model - from picodet.util import Logger, cfg, load_config, load_model_weight - - super(PicoDetTorch, self).__init__(*args, **kwargs) - print("Using PyTorch as inference backend") - print(f"Using weight: {model_path}") - - # load model - self.model_path = model_path - self.cfg_path = cfg_path - load_config(cfg, cfg_path) - self.logger = Logger(-1, cfg.save_dir, False) - self.model = build_model(cfg.model) - checkpoint = self.torch.load( - model_path, map_location=lambda storage, loc: storage) - load_model_weight(self.model, checkpoint, self.logger) - - def infer_image(self, img_input): - self.model.train(False) - with self.torch.no_grad(): - inference_results = self.model(self.torch.from_numpy(img_input)) - scores = [ - x.permute(0, 2, 3, 1).reshape((-1, 80)).sigmoid().detach().numpy() - for x in inference_results[0] - ] - raw_boxes = [ - x.permute(0, 2, 3, 1).reshape((-1, 32)).detach().numpy() - for x in inference_results[1] - ] - return scores, raw_boxes - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--model_path", - dest="model_path", - type=str, - default="../model/picodet-320.mnn") - parser.add_argument( - "--cfg_path", dest="cfg_path", type=str, default="config/picodet-m.yml") - parser.add_argument( - "--img_fold", dest="img_fold", type=str, default="../imgs") - parser.add_argument( - "--result_fold", dest="result_fold", type=str, default="../results") - parser.add_argument( - "--input_shape", - dest="input_shape", - nargs=2, - type=int, - default=[320, 320]) - parser.add_argument( - "--backend", choices=["MNN", "ONNX", "torch"], default="MNN") - args = parser.parse_args() - - print(f"Detecting {args.img_fold}") - - # load detector - if args.backend == "MNN": - detector = PicoDetMNN(args.model_path, input_shape=args.input_shape) - elif args.backend == "ONNX": - detector = PicoDetONNX(args.model_path, input_shape=args.input_shape) - elif args.backend == "torch": - detector = PicoDetTorch( - args.model_path, args.cfg_path, input_shape=args.input_shape) - else: - raise ValueError - - # detect folder - detector.detect_folder(args.img_fold, args.result_fold) - - -def test_one(): - detector = PicoDetMNN("../weight/picodet-416.mnn") - img = cv2.imread("../imgs/000252.jpg") - bbox, label, score = detector.detect(img) - img_draw = detector.draw_box(img, bbox, label, score) - cv2.imwrite('picodet_infer.jpg', img_draw) - - -if __name__ == "__main__": - # main() - test_one() diff --git a/deploy/third_engine/demo_ncnn/CMakeLists.txt b/deploy/third_engine/demo_ncnn/CMakeLists.txt index 4f5cc65fc..0d4344c69 100644 --- a/deploy/third_engine/demo_ncnn/CMakeLists.txt +++ b/deploy/third_engine/demo_ncnn/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.4.1) +cmake_minimum_required(VERSION 3.9) set(CMAKE_CXX_STANDARD 17) project(picodet_demo) @@ -11,9 +11,11 @@ if(OPENMP_FOUND) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") endif() -find_package(OpenCV REQUIRED) +# find_package(OpenCV REQUIRED) +find_package(OpenCV REQUIRED PATHS "/path/to/opencv-3.4.16_gcc8.2_ffmpeg") -find_package(ncnn REQUIRED) +# find_package(ncnn REQUIRED) +find_package(ncnn REQUIRED PATHS "/path/to/ncnn/build/install/lib/cmake/ncnn") if(NOT TARGET ncnn) message(WARNING "ncnn NOT FOUND! Please set ncnn_DIR environment variable") else() diff --git a/deploy/third_engine/demo_ncnn/README.md b/deploy/third_engine/demo_ncnn/README.md index b15052d98..f9867b8ac 100644 --- a/deploy/third_engine/demo_ncnn/README.md +++ b/deploy/third_engine/demo_ncnn/README.md @@ -1,10 +1,8 @@ # PicoDet NCNN Demo -This project provides PicoDet image inference, webcam inference and benchmark using -[Tencent's NCNN framework](https://github.com/Tencent/ncnn). - -# How to build +该Demo提供的预测代码是根据[Tencent's NCNN framework](https://github.com/Tencent/ncnn)推理库预测的。 +# 第一步:编译 ## Windows ### Step1. Download and Install Visual Studio from https://visualstudio.microsoft.com/vs/community/ @@ -12,11 +10,16 @@ Download and Install Visual Studio from https://visualstudio.microsoft.com/vs/co ### Step2. Download and install OpenCV from https://github.com/opencv/opencv/releases -### Step3(Optional). +为了方便,如果环境是gcc8.2 x86环境,可直接下载以下库: +```shell +wget https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz +tar -xf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz +``` + +### Step3(可选). Download and install Vulkan SDK from https://vulkan.lunarg.com/sdk/home -### Step4. -Clone NCNN repository +### Step4:编译NCNN ``` shell script git clone --recursive https://github.com/Tencent/ncnn.git @@ -25,7 +28,7 @@ Build NCNN following this tutorial: [Build for Windows x64 using VS2017](https:/ ### Step5. -Add `ncnn_DIR` = `YOUR_NCNN_PATH/build/install/lib/cmake/ncnn` to system environment variables. +增加 `ncnn_DIR` = `YOUR_NCNN_PATH/build/install/lib/cmake/ncnn` 到系统变量中 Build project: Open x64 Native Tools Command Prompt for VS 2019 or 2017 @@ -42,10 +45,10 @@ msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64 ### Step1. Build and install OpenCV from https://github.com/opencv/opencv -### Step2(Optional). +### Step2(可选). Download Vulkan SDK from https://vulkan.lunarg.com/sdk/home -### Step3. +### Step3:编译NCNN Clone NCNN repository ``` shell script @@ -54,15 +57,7 @@ git clone --recursive https://github.com/Tencent/ncnn.git Build NCNN following this tutorial: [Build for Linux / NVIDIA Jetson / Raspberry Pi](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux) -### Step4. - -Set environment variables. Run: - -``` shell script -export ncnn_DIR=YOUR_NCNN_PATH/build/install/lib/cmake/ncnn -``` - -Build project +### Step4:编译可执行文件 ``` shell script cd @@ -71,47 +66,64 @@ cd build cmake .. make ``` - # Run demo -Download PicoDet ncnn model. -* [PicoDet ncnn model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_ncnn.zip) - - -## Webcam - -```shell script -picodet_demo 0 0 +- 准备模型 + ```shell + modelName=picodet_s_320_coco_lcnet + # 导出Inference model + python tools/export_model.py \ + -c configs/picodet/${modelName}.yml \ + -o weights=${modelName}.pdparams \ + --output_dir=inference_model + # 转换到ONNX + paddle2onnx --model_dir inference_model/${modelName} \ + --model_filename model.pdmodel \ + --params_filename model.pdiparams \ + --opset_version 11 \ + --save_file ${modelName}.onnx + # 简化模型 + python -m onnxsim ${modelName}.onnx ${modelName}_processed.onnx + # 将模型转换至NCNN格式 + Run onnx2ncnn in ncnn tools to generate ncnn .param and .bin file. + ``` +转NCNN模型可以利用在线转换工具 [https://convertmodel.com](https://convertmodel.com/) + +为了快速测试,可直接下载:[picodet_s_320_coco_lcnet-opt.bin](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_coco_lcnet-opt.bin)/ [picodet_s_320_coco_lcnet-opt.param](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_coco_lcnet-opt.param)(不带后处理)。 + +**注意:**由于带后处理后,NCNN预测会出NAN,暂时使用不带后处理Demo即可,带后处理的Demo正在升级中,很快发布。 + + +## 开始运行 + +首先新建预测结果存放目录: +```shell +cp -r ../demo_onnxruntime/imgs . +cd build +mkdir ../results ``` -## Inference images - -```shell script -picodet_demo 1 IMAGE_FOLDER/*.jpg +- 预测一张图片 +``` shell +./picodet_demo 0 ../picodet_s_320_coco_lcnet.bin ../picodet_s_320_coco_lcnet.param 320 320 ../imgs/dog.jpg 0 ``` +具体参数解析可参考`main.cpp`。 -## Inference video +-测试速度Benchmark -```shell script -picodet_demo 2 VIDEO_PATH +``` shell +./picodet_demo 1 ../picodet_s_320_lcnet.bin ../picodet_s_320_lcnet.param 320 320 0 ``` -## Benchmark - -```shell script -picodet_demo 3 0 - -result: picodet min = 17.74 max = 22.71 avg = 18.16 -``` - -**** - -Notice: - -If benchmark speed is slow, try to limit omp thread num. - -Linux: +## FAQ -```shell script -export OMP_THREAD_LIMIT=4 +- 预测结果精度不对: +请先确认模型输入shape是否对齐,并且模型输出name是否对齐,不带后处理的PicoDet增强版模型输出name如下: +```shell +# 分类分支 | 检测分支 +{"transpose_0.tmp_0", "transpose_1.tmp_0"}, +{"transpose_2.tmp_0", "transpose_3.tmp_0"}, +{"transpose_4.tmp_0", "transpose_5.tmp_0"}, +{"transpose_6.tmp_0", "transpose_7.tmp_0"}, ``` +可使用[netron](https://netron.app)查看具体name,并修改`picodet_mnn.hpp`中相应`non_postprocess_heads_info`数组。 diff --git a/deploy/third_engine/demo_ncnn/main.cpp b/deploy/third_engine/demo_ncnn/main.cpp index 2f98d82ae..8f69af93b 100644 --- a/deploy/third_engine/demo_ncnn/main.cpp +++ b/deploy/third_engine/demo_ncnn/main.cpp @@ -13,353 +13,198 @@ // limitations under the License. // reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn +#include "picodet.h" +#include +#include +#include #include #include #include -#include -#include -#include "picodet.h" -#include +#define __SAVE_RESULT__ // if defined save drawed results to ../results, else + // show it in windows struct object_rect { - int x; - int y; - int width; - int height; -}; - -int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area) -{ - int w = src.cols; - int h = src.rows; - int dst_w = dst_size.width; - int dst_h = dst_size.height; - dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0)); - - float ratio_src = w * 1.0 / h; - float ratio_dst = dst_w * 1.0 / dst_h; - - int tmp_w = 0; - int tmp_h = 0; - if (ratio_src > ratio_dst) { - tmp_w = dst_w; - tmp_h = floor((dst_w * 1.0 / w) * h); - } - else if (ratio_src < ratio_dst) { - tmp_h = dst_h; - tmp_w = floor((dst_h * 1.0 / h) * w); - } - else { - cv::resize(src, dst, dst_size); - effect_area.x = 0; - effect_area.y = 0; - effect_area.width = dst_w; - effect_area.height = dst_h; - return 0; - } - - cv::Mat tmp; - cv::resize(src, tmp, cv::Size(tmp_w, tmp_h)); - - if (tmp_w != dst_w) { - int index_w = floor((dst_w - tmp_w) / 2.0); - for (int i = 0; i < dst_h; i++) { - memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3); - } - effect_area.x = index_w; - effect_area.y = 0; - effect_area.width = tmp_w; - effect_area.height = tmp_h; - } - else if (tmp_h != dst_h) { - int index_h = floor((dst_h - tmp_h) / 2.0); - memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3); - effect_area.x = 0; - effect_area.y = index_h; - effect_area.width = tmp_w; - effect_area.height = tmp_h; - } - else { - printf("error\n"); - } - return 0; -} - -const int color_list[80][3] = -{ - {216 , 82 , 24}, - {236 ,176 , 31}, - {125 , 46 ,141}, - {118 ,171 , 47}, - { 76 ,189 ,237}, - {238 , 19 , 46}, - { 76 , 76 , 76}, - {153 ,153 ,153}, - {255 , 0 , 0}, - {255 ,127 , 0}, - {190 ,190 , 0}, - { 0 ,255 , 0}, - { 0 , 0 ,255}, - {170 , 0 ,255}, - { 84 , 84 , 0}, - { 84 ,170 , 0}, - { 84 ,255 , 0}, - {170 , 84 , 0}, - {170 ,170 , 0}, - {170 ,255 , 0}, - {255 , 84 , 0}, - {255 ,170 , 0}, - {255 ,255 , 0}, - { 0 , 84 ,127}, - { 0 ,170 ,127}, - { 0 ,255 ,127}, - { 84 , 0 ,127}, - { 84 , 84 ,127}, - { 84 ,170 ,127}, - { 84 ,255 ,127}, - {170 , 0 ,127}, - {170 , 84 ,127}, - {170 ,170 ,127}, - {170 ,255 ,127}, - {255 , 0 ,127}, - {255 , 84 ,127}, - {255 ,170 ,127}, - {255 ,255 ,127}, - { 0 , 84 ,255}, - { 0 ,170 ,255}, - { 0 ,255 ,255}, - { 84 , 0 ,255}, - { 84 , 84 ,255}, - { 84 ,170 ,255}, - { 84 ,255 ,255}, - {170 , 0 ,255}, - {170 , 84 ,255}, - {170 ,170 ,255}, - {170 ,255 ,255}, - {255 , 0 ,255}, - {255 , 84 ,255}, - {255 ,170 ,255}, - { 42 , 0 , 0}, - { 84 , 0 , 0}, - {127 , 0 , 0}, - {170 , 0 , 0}, - {212 , 0 , 0}, - {255 , 0 , 0}, - { 0 , 42 , 0}, - { 0 , 84 , 0}, - { 0 ,127 , 0}, - { 0 ,170 , 0}, - { 0 ,212 , 0}, - { 0 ,255 , 0}, - { 0 , 0 , 42}, - { 0 , 0 , 84}, - { 0 , 0 ,127}, - { 0 , 0 ,170}, - { 0 , 0 ,212}, - { 0 , 0 ,255}, - { 0 , 0 , 0}, - { 36 , 36 , 36}, - { 72 , 72 , 72}, - {109 ,109 ,109}, - {145 ,145 ,145}, - {182 ,182 ,182}, - {218 ,218 ,218}, - { 0 ,113 ,188}, - { 80 ,182 ,188}, - {127 ,127 , 0}, + int x; + int y; + int width; + int height; }; -void draw_bboxes(const cv::Mat& bgr, const std::vector& bboxes, object_rect effect_roi) -{ - static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", - "train", "truck", "boat", "traffic light", "fire hydrant", - "stop sign", "parking meter", "bench", "bird", "cat", "dog", - "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", - "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", - "skis", "snowboard", "sports ball", "kite", "baseball bat", - "baseball glove", "skateboard", "surfboard", "tennis racket", - "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", "carrot", - "hot dog", "pizza", "donut", "cake", "chair", "couch", - "potted plant", "bed", "dining table", "toilet", "tv", "laptop", - "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", - "toaster", "sink", "refrigerator", "book", "clock", "vase", - "scissors", "teddy bear", "hair drier", "toothbrush" - }; - - cv::Mat image = bgr.clone(); - int src_w = image.cols; - int src_h = image.rows; - int dst_w = effect_roi.width; - int dst_h = effect_roi.height; - float width_ratio = (float)src_w / (float)dst_w; - float height_ratio = (float)src_h / (float)dst_h; - - - for (size_t i = 0; i < bboxes.size(); i++) - { - const BoxInfo& bbox = bboxes[i]; - cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]); - - cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio), - cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color); - - char text[256]; - sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); - - int baseLine = 0; - cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); - - int x = (bbox.x1 - effect_roi.x) * width_ratio; - int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine; - if (y < 0) - y = 0; - if (x + label_size.width > image.cols) - x = image.cols - label_size.width; - - cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), - color, -1); - - cv::putText(image, text, cv::Point(x, y + label_size.height), - cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); - } - cv::imwrite("../result/test_picodet.jpg", image); - printf("************infer image success!!!**********\n"); -} - - -int image_demo(PicoDet &detector, const char* imagepath) -{ - std::vector filenames; - cv::glob(imagepath, filenames, false); - - for (auto img_name : filenames) - { - cv::Mat image = cv::imread(img_name); - if (image.empty()) - { - fprintf(stderr, "cv::imread %s failed\n", img_name); - return -1; - } - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); - auto results = detector.detect(resized_img, 0.4, 0.5); - char imgName[20] = {}; - draw_bboxes(image, results, effect_roi); - cv::waitKey(0); - +std::vector GenerateColorMap(int num_class) { + auto colormap = std::vector(3 * num_class, 0); + for (int i = 0; i < num_class; ++i) { + int j = 0; + int lab = i; + while (lab) { + colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j)); + colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j)); + colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j)); + ++j; + lab >>= 3; } - return 0; + } + return colormap; } -int webcam_demo(PicoDet& detector, int cam_id) -{ - cv::Mat image; - cv::VideoCapture cap(cam_id); - - while (true) - { - cap >> image; - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); - auto results = detector.detect(resized_img, 0.4, 0.5); - draw_bboxes(image, results, effect_roi); - cv::waitKey(1); - } - return 0; +void draw_bboxes(const cv::Mat &im, const std::vector &bboxes, + std::string save_path = "None") { + static const char *class_names[] = { + "person", "bicycle", "car", + "motorcycle", "airplane", "bus", + "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", + "parking meter", "bench", "bird", + "cat", "dog", "horse", + "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", + "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", + "fork", "knife", "spoon", + "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", + "donut", "cake", "chair", + "couch", "potted plant", "bed", + "dining table", "toilet", "tv", + "laptop", "mouse", "remote", + "keyboard", "cell phone", "microwave", + "oven", "toaster", "sink", + "refrigerator", "book", "clock", + "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; + + cv::Mat image = im.clone(); + int src_w = image.cols; + int src_h = image.rows; + int thickness = 2; + auto colormap = GenerateColorMap(sizeof(class_names)); + + for (size_t i = 0; i < bboxes.size(); i++) { + const BoxInfo &bbox = bboxes[i]; + std::cout << bbox.x1 << ". " << bbox.y1 << ". " << bbox.x2 << ". " + << bbox.y2 << ". " << std::endl; + int c1 = colormap[3 * bbox.label + 0]; + int c2 = colormap[3 * bbox.label + 1]; + int c3 = colormap[3 * bbox.label + 2]; + cv::Scalar color = cv::Scalar(c1, c2, c3); + // cv::Scalar color = cv::Scalar(0, 0, 255); + cv::rectangle(image, cv::Rect(cv::Point(bbox.x1, bbox.y1), + cv::Point(bbox.x2, bbox.y2)), + color, 1); + + char text[256]; + sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); + + int baseLine = 0; + cv::Size label_size = + cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + + int x = bbox.x1; + int y = bbox.y1 - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, + label_size.height + baseLine)), + color, -1); + + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255), 1); + } + + if (save_path == "None") { + cv::imshow("image", image); + } else { + cv::imwrite(save_path, image); + std::cout << "Result save in: " << save_path << std::endl; + } } -int video_demo(PicoDet& detector, const char* path) -{ - cv::Mat image; - cv::VideoCapture cap(path); - - while (true) - { - cap >> image; - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi); - auto results = detector.detect(resized_img, 0.4, 0.5); - draw_bboxes(image, results, effect_roi); - cv::waitKey(1); +int image_demo(PicoDet &detector, const char *imagepath, + int has_postprocess = 0) { + std::vector filenames; + cv::glob(imagepath, filenames, false); + bool is_postprocess = has_postprocess > 0 ? true : false; + for (auto img_name : filenames) { + cv::Mat image = cv::imread(img_name, cv::IMREAD_COLOR); + if (image.empty()) { + fprintf(stderr, "cv::imread %s failed\n", img_name.c_str()); + return -1; } - return 0; + std::vector results; + detector.detect(image, results, is_postprocess); + std::cout << "detect done." << std::endl; + +#ifdef __SAVE_RESULT__ + std::string save_path = img_name; + draw_bboxes(image, results, save_path.replace(3, 4, "results")); +#else + draw_bboxes(image, results); + cv::waitKey(0); +#endif + } + return 0; } -int benchmark(PicoDet& detector) -{ - int loop_num = 100; - int warm_up = 8; - - double time_min = DBL_MAX; - double time_max = -DBL_MAX; - double time_avg = 0; - ncnn::Mat input = ncnn::Mat(320, 320, 3); - input.fill(0.01f); - for (int i = 0; i < warm_up + loop_num; i++) - { - double start = ncnn::get_current_time(); - ncnn::Extractor ex = detector.Net->create_extractor(); - ex.input("image", input); // picodet - for (const auto& head_info : detector.heads_info) - { - ncnn::Mat dis_pred; - ncnn::Mat cls_pred; - ex.extract(head_info.dis_layer.c_str(), dis_pred); - ex.extract(head_info.cls_layer.c_str(), cls_pred); - } - double end = ncnn::get_current_time(); - - double time = end - start; - if (i >= warm_up) - { - time_min = (std::min)(time_min, time); - time_max = (std::max)(time_max, time); - time_avg += time; - } +int benchmark(PicoDet &detector, int width, int height, + int has_postprocess = 0) { + int loop_num = 100; + int warm_up = 8; + + double time_min = DBL_MAX; + double time_max = -DBL_MAX; + double time_avg = 0; + cv::Mat image(width, height, CV_8UC3, cv::Scalar(1, 1, 1)); + bool is_postprocess = has_postprocess > 0 ? true : false; + for (int i = 0; i < warm_up + loop_num; i++) { + double start = ncnn::get_current_time(); + std::vector results; + detector.detect(image, results, is_postprocess); + double end = ncnn::get_current_time(); + + double time = end - start; + if (i >= warm_up) { + time_min = (std::min)(time_min, time); + time_max = (std::max)(time_max, time); + time_avg += time; } - time_avg /= loop_num; - fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", time_min, time_max, time_avg); - return 0; + } + time_avg /= loop_num; + fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", + time_min, time_max, time_avg); + return 0; } - -int main(int argc, char** argv) -{ - if (argc != 3) - { - fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]); - return -1; - } - PicoDet detector = PicoDet("../weight/picodet_m_416.param", "../weight/picodet_m_416.bin", true); - int mode = atoi(argv[1]); - switch (mode) - { - case 0:{ - int cam_id = atoi(argv[2]); - webcam_demo(detector, cam_id); - break; - } - case 1:{ - const char* images = argv[2]; - image_demo(detector, images); - break; - } - case 2:{ - const char* path = argv[2]; - video_demo(detector, path); - break; - } - case 3:{ - benchmark(detector); - break; - } - default:{ - fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]); - break; - } +int main(int argc, char **argv) { + int mode = atoi(argv[1]); + char *bin_model_path = argv[2]; + char *param_model_path = argv[3]; + int height = 320; + int width = 320; + if (argc == 5) { + height = atoi(argv[4]); + width = atoi(argv[5]); + } + PicoDet detector = + PicoDet(param_model_path, bin_model_path, width, height, true, 0.45, 0.3); + if (mode == 1) { + + benchmark(detector, width, height, atoi(argv[6])); + } else { + if (argc != 6) { + std::cout << "Must set image file, such as ./picodet_demo 0 " + "../picodet_s_320_lcnet.bin ../picodet_s_320_lcnet.param " + "320 320 img.jpg" + << std::endl; } + const char *images = argv[6]; + image_demo(detector, images, atoi(argv[7])); + } } diff --git a/deploy/third_engine/demo_ncnn/picodet.cpp b/deploy/third_engine/demo_ncnn/picodet.cpp index c4dec46b2..d5f0ba3c7 100644 --- a/deploy/third_engine/demo_ncnn/picodet.cpp +++ b/deploy/third_engine/demo_ncnn/picodet.cpp @@ -48,7 +48,9 @@ int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { bool PicoDet::hasGPU = false; PicoDet *PicoDet::detector = nullptr; -PicoDet::PicoDet(const char *param, const char *bin, bool useGPU) { +PicoDet::PicoDet(const char *param, const char *bin, int input_width, + int input_hight, bool useGPU, float score_threshold_ = 0.5, + float nms_threshold_ = 0.3) { this->Net = new ncnn::Net(); #if NCNN_VULKAN this->hasGPU = ncnn::get_gpu_count() > 0; @@ -57,21 +59,28 @@ PicoDet::PicoDet(const char *param, const char *bin, bool useGPU) { this->Net->opt.use_fp16_arithmetic = true; this->Net->load_param(param); this->Net->load_model(bin); + this->in_w = input_width; + this->in_h = input_hight; + this->score_threshold = score_threshold_; + this->nms_threshold = nms_threshold_; } PicoDet::~PicoDet() { delete this->Net; } void PicoDet::preprocess(cv::Mat &image, ncnn::Mat &in) { + // cv::resize(image, image, cv::Size(this->in_w, this->in_h), 0.f, 0.f); int img_w = image.cols; int img_h = image.rows; - in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h); + in = ncnn::Mat::from_pixels_resize(image.data, ncnn::Mat::PIXEL_BGR, img_w, + img_h, this->in_w, this->in_h); const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f}; in.substract_mean_normalize(mean_vals, norm_vals); } -std::vector PicoDet::detect(cv::Mat image, float score_threshold, - float nms_threshold) { +int PicoDet::detect(cv::Mat image, std::vector &result_list, + bool has_postprocess) { + ncnn::Mat input; preprocess(image, input); auto ex = this->Net->create_extractor(); @@ -82,34 +91,76 @@ std::vector PicoDet::detect(cv::Mat image, float score_threshold, #endif ex.input("image", input); // picodet + this->image_h = image.rows; + this->image_w = image.cols; + std::vector> results; results.resize(this->num_class); - for (const auto &head_info : this->heads_info) { + if (has_postprocess) { ncnn::Mat dis_pred; ncnn::Mat cls_pred; - ex.extract(head_info.dis_layer.c_str(), dis_pred); - ex.extract(head_info.cls_layer.c_str(), cls_pred); - this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, - results); + ex.extract(this->nms_heads_info[0].c_str(), dis_pred); + ex.extract(this->nms_heads_info[1].c_str(), cls_pred); + std::cout << dis_pred.h << " " << dis_pred.w << std::endl; + std::cout << cls_pred.h << " " << cls_pred.w << std::endl; + this->nms_boxes(cls_pred, dis_pred, this->score_threshold, results); + } else { + for (const auto &head_info : this->non_postprocess_heads_info) { + ncnn::Mat dis_pred; + ncnn::Mat cls_pred; + ex.extract(head_info.dis_layer.c_str(), dis_pred); + ex.extract(head_info.cls_layer.c_str(), cls_pred); + this->decode_infer(cls_pred, dis_pred, head_info.stride, + this->score_threshold, results); + } } - std::vector dets; for (int i = 0; i < (int)results.size(); i++) { - this->nms(results[i], nms_threshold); + this->nms(results[i], this->nms_threshold); for (auto box : results[i]) { - dets.push_back(box); + box.x1 = box.x1 / this->in_w * this->image_w; + box.x2 = box.x2 / this->in_w * this->image_w; + box.y1 = box.y1 / this->in_h * this->image_h; + box.y2 = box.y2 / this->in_h * this->image_h; + result_list.push_back(box); + } + } + return 0; +} + +void PicoDet::nms_boxes(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, + float score_threshold, + std::vector> &result_list) { + BoxInfo bbox; + int i, j; + for (i = 0; i < dis_pred.h; i++) { + bbox.x1 = dis_pred.row(i)[0]; + bbox.y1 = dis_pred.row(i)[1]; + bbox.x2 = dis_pred.row(i)[2]; + bbox.y2 = dis_pred.row(i)[3]; + const float *scores = cls_pred.row(i); + float score = 0; + int cur_label = 0; + for (int label = 0; label < this->num_class; label++) { + float score_ = cls_pred.row(label)[i]; + if (score_ > score) { + score = score_; + cur_label = label; + } } + bbox.score = score; + bbox.label = cur_label; + result_list[cur_label].push_back(bbox); } - return dets; } void PicoDet::decode_infer(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, int stride, float threshold, std::vector> &results) { - int feature_h = ceil((float)this->input_size[1] / stride); - int feature_w = ceil((float)this->input_size[0] / stride); + int feature_h = ceil((float)this->in_w / stride); + int feature_w = ceil((float)this->in_h / stride); for (int idx = 0; idx < feature_h * feature_w; idx++) { const float *scores = cls_pred.row(idx); @@ -151,8 +202,8 @@ BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, } float xmin = (std::max)(ct_x - dis_pred[0], .0f); float ymin = (std::max)(ct_y - dis_pred[1], .0f); - float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]); - float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]); + float xmax = (std::min)(ct_x + dis_pred[2], (float)this->in_w); + float ymax = (std::min)(ct_y + dis_pred[3], (float)this->in_w); return BoxInfo{xmin, ymin, xmax, ymax, score, label}; } diff --git a/deploy/third_engine/demo_ncnn/picodet.h b/deploy/third_engine/demo_ncnn/picodet.h index dfb0967c9..dd8c8f5af 100644 --- a/deploy/third_engine/demo_ncnn/picodet.h +++ b/deploy/third_engine/demo_ncnn/picodet.h @@ -16,66 +16,72 @@ #ifndef PICODET_H #define PICODET_H -#include #include +#include -typedef struct HeadInfo -{ - std::string cls_layer; - std::string dis_layer; - int stride; -}; +typedef struct NonPostProcessHeadInfo { + std::string cls_layer; + std::string dis_layer; + int stride; +} NonPostProcessHeadInfo; -typedef struct BoxInfo -{ - float x1; - float y1; - float x2; - float y2; - float score; - int label; +typedef struct BoxInfo { + float x1; + float y1; + float x2; + float y2; + float score; + int label; } BoxInfo; -class PicoDet -{ +class PicoDet { public: - PicoDet(const char* param, const char* bin, bool useGPU); - - ~PicoDet(); + PicoDet(const char *param, const char *bin, int input_width, int input_hight, + bool useGPU, float score_threshold_, float nms_threshold_); - static PicoDet* detector; - ncnn::Net* Net; - static bool hasGPU; + ~PicoDet(); - std::vector heads_info{ - // cls_pred|dis_pred|stride - {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8}, - {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16}, - {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32}, - {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64}, - }; + static PicoDet *detector; + ncnn::Net *Net; + static bool hasGPU; - std::vector detect(cv::Mat image, float score_threshold, float nms_threshold); + int detect(cv::Mat image, std::vector &result_list, + bool has_postprocess); - std::vector labels{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light", - "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", - "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", - "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", - "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple", - "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch", - "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone", - "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", - "hair drier", "toothbrush" }; private: - void preprocess(cv::Mat& image, ncnn::Mat& in); - void decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector>& results); - BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride); - static void nms(std::vector& result, float nms_threshold); - int input_size[2] = {320, 320}; - int num_class = 80; - int reg_max = 7; + void preprocess(cv::Mat &image, ncnn::Mat &in); + void decode_infer(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, int stride, + float threshold, + std::vector> &results); + BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, + int y, int stride); + static void nms(std::vector &result, float nms_threshold); + void nms_boxes(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, + float score_threshold, + std::vector> &result_list); -}; + int image_w; + int image_h; + int in_w = 320; + int in_h = 320; + int num_class = 80; + int reg_max = 7; + + float score_threshold; + float nms_threshold; + std::vector bbox_output_data_; + std::vector class_output_data_; + + std::vector nms_heads_info{"tmp_16", "concat_4.tmp_0"}; + // If not export post-process, will use non_postprocess_heads_info + std::vector non_postprocess_heads_info{ + // cls_pred|dis_pred|stride + {"transpose_0.tmp_0", "transpose_1.tmp_0", 8}, + {"transpose_2.tmp_0", "transpose_3.tmp_0", 16}, + {"transpose_4.tmp_0", "transpose_5.tmp_0", 32}, + {"transpose_6.tmp_0", "transpose_7.tmp_0", 64}, + }; +}; #endif diff --git a/deploy/third_engine/demo_ncnn/python/demo_ncnn.py b/deploy/third_engine/demo_ncnn/python/demo_ncnn.py deleted file mode 100644 index 492eb1e0d..000000000 --- a/deploy/third_engine/demo_ncnn/python/demo_ncnn.py +++ /dev/null @@ -1,808 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at - -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn - -# -*- coding: utf-8 -*- -import argparse -from abc import ABCMeta, abstractmethod -from pathlib import Path - -import cv2 -import matplotlib.pyplot as plt -import numpy as np -from scipy.special import softmax -from tqdm import tqdm - -_COLORS = (np.array([ - 0.000, - 0.447, - 0.741, - 0.850, - 0.325, - 0.098, - 0.929, - 0.694, - 0.125, - 0.494, - 0.184, - 0.556, - 0.466, - 0.674, - 0.188, - 0.301, - 0.745, - 0.933, - 0.635, - 0.078, - 0.184, - 0.300, - 0.300, - 0.300, - 0.600, - 0.600, - 0.600, - 1.000, - 0.000, - 0.000, - 1.000, - 0.500, - 0.000, - 0.749, - 0.749, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 1.000, - 0.667, - 0.000, - 1.000, - 0.333, - 0.333, - 0.000, - 0.333, - 0.667, - 0.000, - 0.333, - 1.000, - 0.000, - 0.667, - 0.333, - 0.000, - 0.667, - 0.667, - 0.000, - 0.667, - 1.000, - 0.000, - 1.000, - 0.333, - 0.000, - 1.000, - 0.667, - 0.000, - 1.000, - 1.000, - 0.000, - 0.000, - 0.333, - 0.500, - 0.000, - 0.667, - 0.500, - 0.000, - 1.000, - 0.500, - 0.333, - 0.000, - 0.500, - 0.333, - 0.333, - 0.500, - 0.333, - 0.667, - 0.500, - 0.333, - 1.000, - 0.500, - 0.667, - 0.000, - 0.500, - 0.667, - 0.333, - 0.500, - 0.667, - 0.667, - 0.500, - 0.667, - 1.000, - 0.500, - 1.000, - 0.000, - 0.500, - 1.000, - 0.333, - 0.500, - 1.000, - 0.667, - 0.500, - 1.000, - 1.000, - 0.500, - 0.000, - 0.333, - 1.000, - 0.000, - 0.667, - 1.000, - 0.000, - 1.000, - 1.000, - 0.333, - 0.000, - 1.000, - 0.333, - 0.333, - 1.000, - 0.333, - 0.667, - 1.000, - 0.333, - 1.000, - 1.000, - 0.667, - 0.000, - 1.000, - 0.667, - 0.333, - 1.000, - 0.667, - 0.667, - 1.000, - 0.667, - 1.000, - 1.000, - 1.000, - 0.000, - 1.000, - 1.000, - 0.333, - 1.000, - 1.000, - 0.667, - 1.000, - 0.333, - 0.000, - 0.000, - 0.500, - 0.000, - 0.000, - 0.667, - 0.000, - 0.000, - 0.833, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 0.167, - 0.000, - 0.000, - 0.333, - 0.000, - 0.000, - 0.500, - 0.000, - 0.000, - 0.667, - 0.000, - 0.000, - 0.833, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 0.167, - 0.000, - 0.000, - 0.333, - 0.000, - 0.000, - 0.500, - 0.000, - 0.000, - 0.667, - 0.000, - 0.000, - 0.833, - 0.000, - 0.000, - 1.000, - 0.000, - 0.000, - 0.000, - 0.143, - 0.143, - 0.143, - 0.286, - 0.286, - 0.286, - 0.429, - 0.429, - 0.429, - 0.571, - 0.571, - 0.571, - 0.714, - 0.714, - 0.714, - 0.857, - 0.857, - 0.857, - 0.000, - 0.447, - 0.741, - 0.314, - 0.717, - 0.741, - 0.50, - 0.5, - 0, -]).astype(np.float32).reshape(-1, 3)) - - -def get_resize_matrix(raw_shape, dst_shape, keep_ratio): - """ - Get resize matrix for resizing raw img to input size - :param raw_shape: (width, height) of raw image - :param dst_shape: (width, height) of input image - :param keep_ratio: whether keep original ratio - :return: 3x3 Matrix - """ - r_w, r_h = raw_shape - d_w, d_h = dst_shape - Rs = np.eye(3) - if keep_ratio: - C = np.eye(3) - C[0, 2] = -r_w / 2 - C[1, 2] = -r_h / 2 - - if r_w / r_h < d_w / d_h: - ratio = d_h / r_h - else: - ratio = d_w / r_w - Rs[0, 0] *= ratio - Rs[1, 1] *= ratio - - T = np.eye(3) - T[0, 2] = 0.5 * d_w - T[1, 2] = 0.5 * d_h - return T @Rs @C - else: - Rs[0, 0] *= d_w / r_w - Rs[1, 1] *= d_h / r_h - return Rs - - -def warp_boxes(boxes, M, width, height): - """Apply transform to boxes - Copy from picodet/data/transform/warp.py - """ - n = len(boxes) - if n: - # warp points - xy = np.ones((n * 4, 3)) - xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( - n * 4, 2) # x1y1, x2y2, x1y2, x2y1 - xy = xy @M.T # transform - xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale - # create new boxes - x = xy[:, [0, 2, 4, 6]] - y = xy[:, [1, 3, 5, 7]] - xy = np.concatenate( - (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T - # clip boxes - xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) - xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) - return xy.astype(np.float32) - else: - return boxes - - -def overlay_bbox_cv(img, all_box, class_names): - """Draw result boxes - Copy from picodet/util/visualization.py - """ - all_box.sort(key=lambda v: v[5]) - for box in all_box: - label, x0, y0, x1, y1, score = box - color = (_COLORS[label] * 255).astype(np.uint8).tolist() - text = "{}:{:.1f}%".format(class_names[label], score * 100) - txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255, - 255) - font = cv2.FONT_HERSHEY_SIMPLEX - txt_size = cv2.getTextSize(text, font, 0.5, 2)[0] - cv2.rectangle(img, (x0, y0), (x1, y1), color, 2) - - cv2.rectangle( - img, - (x0, y0 - txt_size[1] - 1), - (x0 + txt_size[0] + txt_size[1], y0 - 1), - color, - -1, ) - cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1) - return img - - -def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200): - """ - - Args: - box_scores (N, 5): boxes in corner-form and probabilities. - iou_threshold: intersection over union threshold. - top_k: keep top_k results. If k <= 0, keep all the results. - candidate_size: only consider the candidates with the highest scores. - Returns: - picked: a list of indexes of the kept boxes - """ - scores = box_scores[:, -1] - boxes = box_scores[:, :-1] - picked = [] - indexes = np.argsort(scores) - indexes = indexes[-candidate_size:] - while len(indexes) > 0: - current = indexes[-1] - picked.append(current) - if 0 < top_k == len(picked) or len(indexes) == 1: - break - current_box = boxes[current, :] - indexes = indexes[:-1] - rest_boxes = boxes[indexes, :] - iou = iou_of( - rest_boxes, - np.expand_dims( - current_box, axis=0), ) - indexes = indexes[iou <= iou_threshold] - - return box_scores[picked, :] - - -def iou_of(boxes0, boxes1, eps=1e-5): - """Return intersection-over-union (Jaccard index) of boxes. - - Args: - boxes0 (N, 4): ground truth boxes. - boxes1 (N or 1, 4): predicted boxes. - eps: a small number to avoid 0 as denominator. - Returns: - iou (N): IoU values. - """ - overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2]) - overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:]) - - overlap_area = area_of(overlap_left_top, overlap_right_bottom) - area0 = area_of(boxes0[..., :2], boxes0[..., 2:]) - area1 = area_of(boxes1[..., :2], boxes1[..., 2:]) - return overlap_area / (area0 + area1 - overlap_area + eps) - - -def area_of(left_top, right_bottom): - """Compute the areas of rectangles given two corners. - - Args: - left_top (N, 2): left top corner. - right_bottom (N, 2): right bottom corner. - - Returns: - area (N): return the area. - """ - hw = np.clip(right_bottom - left_top, 0.0, None) - return hw[..., 0] * hw[..., 1] - - -class picodetABC(metaclass=ABCMeta): - def __init__( - self, - input_shape=[320, 320], - reg_max=7, - strides=[8, 16, 32], - prob_threshold=0.4, - iou_threshold=0.3, - num_candidate=1000, - top_k=-1, ): - self.strides = strides - self.input_shape = input_shape - self.reg_max = reg_max - self.prob_threshold = prob_threshold - self.iou_threshold = iou_threshold - self.num_candidate = num_candidate - self.top_k = top_k - self.img_mean = [103.53, 116.28, 123.675] - self.img_std = [57.375, 57.12, 58.395] - self.input_size = (self.input_shape[1], self.input_shape[0]) - self.class_names = [ - "person", - "bicycle", - "car", - "motorcycle", - "airplane", - "bus", - "train", - "truck", - "boat", - "traffic_light", - "fire_hydrant", - "stop_sign", - "parking_meter", - "bench", - "bird", - "cat", - "dog", - "horse", - "sheep", - "cow", - "elephant", - "bear", - "zebra", - "giraffe", - "backpack", - "umbrella", - "handbag", - "tie", - "suitcase", - "frisbee", - "skis", - "snowboard", - "sports_ball", - "kite", - "baseball_bat", - "baseball_glove", - "skateboard", - "surfboard", - "tennis_racket", - "bottle", - "wine_glass", - "cup", - "fork", - "knife", - "spoon", - "bowl", - "banana", - "apple", - "sandwich", - "orange", - "broccoli", - "carrot", - "hot_dog", - "pizza", - "donut", - "cake", - "chair", - "couch", - "potted_plant", - "bed", - "dining_table", - "toilet", - "tv", - "laptop", - "mouse", - "remote", - "keyboard", - "cell_phone", - "microwave", - "oven", - "toaster", - "sink", - "refrigerator", - "book", - "clock", - "vase", - "scissors", - "teddy_bear", - "hair_drier", - "toothbrush", - ] - - def preprocess(self, img): - # resize image - ResizeM = get_resize_matrix((img.shape[1], img.shape[0]), - self.input_size, True) - img_resize = cv2.warpPerspective(img, ResizeM, dsize=self.input_size) - # normalize image - img_input = img_resize.astype(np.float32) / 255 - img_mean = np.array( - self.img_mean, dtype=np.float32).reshape(1, 1, 3) / 255 - img_std = np.array( - self.img_std, dtype=np.float32).reshape(1, 1, 3) / 255 - img_input = (img_input - img_mean) / img_std - # expand dims - img_input = np.transpose(img_input, [2, 0, 1]) - img_input = np.expand_dims(img_input, axis=0) - return img_input, ResizeM - - def postprocess(self, scores, raw_boxes, ResizeM, raw_shape): - # generate centers - decode_boxes = [] - select_scores = [] - for stride, box_distribute, score in zip(self.strides, raw_boxes, - scores): - # centers - fm_h = self.input_shape[0] / stride - fm_w = self.input_shape[1] / stride - h_range = np.arange(fm_h) - w_range = np.arange(fm_w) - ww, hh = np.meshgrid(w_range, h_range) - ct_row = (hh.flatten() + 0.5) * stride - ct_col = (ww.flatten() + 0.5) * stride - center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1) - - # box distribution to distance - reg_range = np.arange(self.reg_max + 1) - box_distance = box_distribute.reshape((-1, self.reg_max + 1)) - box_distance = softmax(box_distance, axis=1) - box_distance = box_distance * np.expand_dims(reg_range, axis=0) - box_distance = np.sum(box_distance, axis=1).reshape((-1, 4)) - box_distance = box_distance * stride - - # top K candidate - topk_idx = np.argsort(score.max(axis=1))[::-1] - topk_idx = topk_idx[:self.num_candidate] - center = center[topk_idx] - score = score[topk_idx] - box_distance = box_distance[topk_idx] - - # decode box - decode_box = center + [-1, -1, 1, 1] * box_distance - - select_scores.append(score) - decode_boxes.append(decode_box) - - # nms - bboxes = np.concatenate(decode_boxes, axis=0) - confidences = np.concatenate(select_scores, axis=0) - picked_box_probs = [] - picked_labels = [] - for class_index in range(0, confidences.shape[1]): - probs = confidences[:, class_index] - mask = probs > self.prob_threshold - probs = probs[mask] - if probs.shape[0] == 0: - continue - subset_boxes = bboxes[mask, :] - box_probs = np.concatenate( - [subset_boxes, probs.reshape(-1, 1)], axis=1) - box_probs = hard_nms( - box_probs, - iou_threshold=self.iou_threshold, - top_k=self.top_k, ) - picked_box_probs.append(box_probs) - picked_labels.extend([class_index] * box_probs.shape[0]) - if not picked_box_probs: - return np.array([]), np.array([]), np.array([]) - picked_box_probs = np.concatenate(picked_box_probs) - - # resize output boxes - picked_box_probs[:, :4] = warp_boxes(picked_box_probs[:, :4], - np.linalg.inv(ResizeM), - raw_shape[1], raw_shape[0]) - return ( - picked_box_probs[:, :4].astype(np.int32), - np.array(picked_labels), - picked_box_probs[:, 4], ) - - @abstractmethod - def infer_image(self, img_input): - pass - - def detect(self, img): - raw_shape = img.shape - img_input, ResizeM = self.preprocess(img) - scores, raw_boxes = self.infer_image(img_input) - if scores[0].ndim == 1: # handling num_classes=1 case - scores = [x[:, None] for x in scores] - bbox, label, score = self.postprocess(scores, raw_boxes, ResizeM, - raw_shape) - return bbox, label, score - - def draw_box(self, raw_img, bbox, label, score): - img = raw_img.copy() - all_box = [[x, ] + y + [z, ] - for x, y, z in zip(label, bbox.tolist(), score)] - img_draw = overlay_bbox_cv(img, all_box, self.class_names) - return img_draw - - def detect_folder(self, img_fold, result_path): - img_fold = Path(img_fold) - result_path = Path(result_path) - result_path.mkdir(parents=True, exist_ok=True) - - img_name_list = filter( - lambda x: str(x).endswith(".png") or str(x).endswith(".jpg"), - img_fold.iterdir(), ) - img_name_list = list(img_name_list) - print(f"find {len(img_name_list)} images") - - for img_path in tqdm(img_name_list): - img = cv2.imread(str(img_path)) - bbox, label, score = self.detect(img) - img_draw = self.draw_box(img, bbox, label, score) - save_path = str(result_path / img_path.name.replace(".png", ".jpg")) - cv2.imwrite(save_path, img_draw) - - -class picodetONNX(picodetABC): - def __init__(self, model_path, *args, **kwargs): - import onnxruntime as ort - - super(picodetONNX, self).__init__(*args, **kwargs) - print("Using ONNX as inference backend") - print(f"Using weight: {model_path}") - - # load model - self.model_path = model_path - self.ort_session = ort.InferenceSession(self.model_path) - self.input_name = self.ort_session.get_inputs()[0].name - - def infer_image(self, img_input): - inference_results = self.ort_session.run(None, - {self.input_name: img_input}) - scores = [np.squeeze(x) for x in inference_results[:3]] - raw_boxes = [np.squeeze(x) for x in inference_results[3:]] - return scores, raw_boxes - - -class picodetTorch(picodetABC): - def __init__(self, model_path, cfg_path, *args, **kwargs): - import torch - - from picodet.model.arch import build_model - from picodet.util import Logger, cfg, load_config, load_model_weight - - super(picodetTorch, self).__init__(*args, **kwargs) - print("Using PyTorch as inference backend") - print(f"Using weight: {model_path}") - - # load model - self.model_path = model_path - self.cfg_path = cfg_path - load_config(cfg, cfg_path) - self.logger = Logger(-1, cfg.save_dir, False) - self.model = build_model(cfg.model) - checkpoint = torch.load( - model_path, map_location=lambda storage, loc: storage) - load_model_weight(self.model, checkpoint, self.logger) - - def infer_image(self, img_input): - import torch - - self.model.train(False) - with torch.no_grad(): - inference_results = self.model(torch.from_numpy(img_input)) - scores = [ - x.permute(0, 2, 3, 1).reshape((-1, 80)).sigmoid().detach().numpy() - for x in inference_results[0] - ] - raw_boxes = [ - x.permute(0, 2, 3, 1).reshape((-1, 32)).detach().numpy() - for x in inference_results[1] - ] - return scores, raw_boxes - - -class picodetNCNN(picodetABC): - def __init__(self, model_param, model_bin, *args, **kwargs): - import ncnn - - super(picodetNCNN, self).__init__(*args, **kwargs) - print("Using ncnn as inference backend") - print(f"Using param: {model_param}, bin: {model_bin}") - - # load model - self.model_param = model_param - self.model_bin = model_bin - - self.net = ncnn.Net() - self.net.load_param(model_param) - self.net.load_model(model_bin) - self.input_name = "input.1" - - def infer_image(self, img_input): - import ncnn - - mat_in = ncnn.Mat(img_input.squeeze()) - ex = self.net.create_extractor() - ex.input(self.input_name, mat_in) - - score_out_name = [ - "save_infer_model/scale_0.tmp_1", "save_infer_model/scale_1.tmp_1", - "save_infer_model/scale_2.tmp_1", "save_infer_model/scale_3.tmp_1" - ] - scores = [np.array(ex.extract(x)[1]) for x in score_out_name] - scores = [np.reshape(x, (-1, 80)) for x in scores] - - boxes_out_name = [ - "save_infer_model/scale_4.tmp_1", "save_infer_model/scale_5.tmp_1", - "save_infer_model/scale_6.tmp_1", "save_infer_model/scale_7.tmp_1" - ] - raw_boxes = [np.array(ex.extract(x)[1]) for x in boxes_out_name] - raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes] - - return scores, raw_boxes - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--model_path", - dest="model_path", - type=str, - default="../model/picodet.param") - parser.add_argument( - "--model_bin", - dest="model_bin", - type=str, - default="../model/picodet.bin") - parser.add_argument( - "--cfg_path", dest="cfg_path", type=str, default="config/picodet.yml") - parser.add_argument( - "--img_fold", dest="img_fold", type=str, default="../imgs") - parser.add_argument( - "--result_fold", dest="result_fold", type=str, default="../results") - parser.add_argument( - "--input_shape", - dest="input_shape", - nargs=2, - type=int, - default=[320, 320]) - parser.add_argument( - "--backend", choices=["ncnn", "ONNX", "torch"], default="ncnn") - args = parser.parse_args() - - print(f"Detecting {args.img_fold}") - - # load detector - if args.backend == "ncnn": - detector = picodetNCNN( - args.model_path, args.model_bin, input_shape=args.input_shape) - elif args.backend == "ONNX": - detector = picodetONNX(args.model_path, input_shape=args.input_shape) - elif args.backend == "torch": - detector = picodetTorch( - args.model_path, args.cfg_path, input_shape=args.input_shape) - else: - raise ValueError - - # detect folder - detector.detect_folder(args.img_fold, args.result_fold) - - -def test_one(): - detector = picodetNCNN("../weight/picodet_m_416.param", - "../weight/picodet_m_416.bin") - img = cv2.imread("../000000000102.jpg") - bbox, label, score = detector.detect(img) - img_draw = detector.draw_box(img, bbox, label, score) - img_out = img_draw[..., ::-1] - cv2.imwrite('python_version.jpg', img_out) - - -if __name__ == "__main__": - # main() - test_one() -- GitLab