From a6189b1213b53b21bd0c8d977398f50b71308b5d Mon Sep 17 00:00:00 2001
From: Guanghua Yu <742925032@qq.com>
Date: Mon, 18 Apr 2022 09:52:09 +0800
Subject: [PATCH] update picodet ncnn and mnn demo (#5721)

---
 configs/picodet/README.md                     |  15 +-
 configs/picodet/README_en.md                  |  14 +-
 deploy/third_engine/demo_mnn/CMakeLists.txt   |   7 +-
 deploy/third_engine/demo_mnn/README.md        | 108 +--
 deploy/third_engine/demo_mnn/main.cpp         | 499 ++++-------
 deploy/third_engine/demo_mnn/picodet_mnn.cpp  |  72 +-
 deploy/third_engine/demo_mnn/picodet_mnn.hpp  | 129 ++-
 .../third_engine/demo_mnn/python/demo_mnn.py  | 803 -----------------
 deploy/third_engine/demo_ncnn/CMakeLists.txt  |   8 +-
 deploy/third_engine/demo_ncnn/README.md       | 116 +--
 deploy/third_engine/demo_ncnn/main.cpp        | 505 ++++-------
 deploy/third_engine/demo_ncnn/picodet.cpp     |  85 +-
 deploy/third_engine/demo_ncnn/picodet.h       | 102 +--
 .../demo_ncnn/python/demo_ncnn.py             | 808 ------------------
 14 files changed, 714 insertions(+), 2557 deletions(-)
 delete mode 100644 deploy/third_engine/demo_mnn/python/demo_mnn.py
 delete mode 100644 deploy/third_engine/demo_ncnn/python/demo_ncnn.py

diff --git a/configs/picodet/README.md b/configs/picodet/README.md
index fde657de2..7ccf4c16e 100644
--- a/configs/picodet/README.md
+++ b/configs/picodet/README.md
@@ -226,11 +226,16 @@ paddle2onnx --model_dir output_inference/picodet_s_320_coco_lcnet/ \
 
 ### 部署
 
-- OpenVINO demo [Python](../../deploy/third_engine/demo_openvino/python)
-- [PaddleLite C++ demo](../../deploy/lite)
-- [Android demo(Paddle Lite)](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo)
-- ONNXRuntime demo [Python](../../deploy/third_engine/demo_onnxruntime)
-- PaddleInference demo [Python](../../deploy/python) & [C++](../../deploy/cpp)
+| 预测库     | Python | C++  | 带后处理预测 |
+| :-------- | :--------: | :---------------------: | :----------------: |
+| OpenVINO | [Python](../../deploy/third_engine/demo_openvino/python) | [C++](../../deploy/third_engine/demo_openvino)（带后处理开发中） |  ✔︎ |
+| Paddle Lite |  -    |  [C++](../../deploy/lite) | ✔︎ |
+| Android Demo |  -  |  [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo) | ✔︎ |
+| PaddleInference | [Python](../../deploy/python) |  [C++](../../deploy/cpp) | ✔︎ |
+| ONNXRuntime  | [Python](../../deploy/third_engine/demo_onnxruntime) | Comming soon | ✔︎ |
+| NCNN |  Comming soon  | [C++](../../deploy/third_engine/demo_ncnn) | ✘ |
+| MNN  | Comming soon | [C++](../../deploy/third_engine/demo_mnn) |  ✘ |
+
 
 
 Android demo可视化：
diff --git a/configs/picodet/README_en.md b/configs/picodet/README_en.md
index 6ab4e42cf..a92ffe7e9 100644
--- a/configs/picodet/README_en.md
+++ b/configs/picodet/README_en.md
@@ -222,11 +222,15 @@ paddle2onnx --model_dir output_inference/picodet_s_320_coco_lcnet/ \
 
 ### Deploy
 
-- OpenVINO demo [Python](../../deploy/third_engine/demo_openvino/python)
-- [PaddleLite C++ demo](../../deploy/lite)
-- [Android demo(Paddle Lite)](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo)
-- ONNXRuntime demo [Python](../../deploy/third_engine/demo_onnxruntime)
-- PaddleInference demo [Python](../../deploy/python) & [C++](../../deploy/cpp)
+| Infer Engine     | Python | C++  | Predict With Postprocess |
+| :-------- | :--------: | :---------------------: | :----------------: |
+| OpenVINO | [Python](../../deploy/third_engine/demo_openvino/python) | [C++](../../deploy/third_engine/demo_openvino)（postprocess comming soon） |  ✔︎ |
+| Paddle Lite |  -    |  [C++](../../deploy/lite) | ✔︎ |
+| Android Demo |  -  |  [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite-Demo/tree/develop/object_detection/android/app/cxx/picodet_detection_demo) | ✔︎ |
+| PaddleInference | [Python](../../deploy/python) |  [C++](../../deploy/cpp) | ✔︎ |
+| ONNXRuntime  | [Python](../../deploy/third_engine/demo_onnxruntime) | Comming soon | ✔︎ |
+| NCNN |  Comming soon  | [C++](../../deploy/third_engine/demo_ncnn) | ✘ |
+| MNN  | Comming soon | [C++](../../deploy/third_engine/demo_mnn) |  ✘ |
 
 
 Android demo visualization:
diff --git a/deploy/third_engine/demo_mnn/CMakeLists.txt b/deploy/third_engine/demo_mnn/CMakeLists.txt
index 07d9b7f86..9afa8cfc0 100644
--- a/deploy/third_engine/demo_mnn/CMakeLists.txt
+++ b/deploy/third_engine/demo_mnn/CMakeLists.txt
@@ -2,13 +2,14 @@ cmake_minimum_required(VERSION 3.9)
 project(picodet-mnn)
 
 set(CMAKE_CXX_STANDARD 17)
+set(MNN_DIR PATHS "./mnn")
 
 # find_package(OpenCV REQUIRED PATHS "/work/dependence/opencv/opencv-3.4.3/build")
 find_package(OpenCV REQUIRED)
 include_directories(
-        /path/to/MNN/include/MNN
-        /path/to/MNN/include
-        .
+        ${MNN_DIR}/include
+        ${MNN_DIR}/include/MNN
+        ${CMAKE_SOURCE_DIR}
 )
 link_directories(mnn/lib)
 
diff --git a/deploy/third_engine/demo_mnn/README.md b/deploy/third_engine/demo_mnn/README.md
index 78a0f3a79..ac11a8e18 100644
--- a/deploy/third_engine/demo_mnn/README.md
+++ b/deploy/third_engine/demo_mnn/README.md
@@ -1,105 +1,89 @@
 # PicoDet MNN Demo
 
-This fold provides PicoDet inference code using
-[Alibaba's MNN framework](https://github.com/alibaba/MNN). Most of the implements in
-this fold are same as *demo_ncnn*.
+本Demo提供的预测代码是根据[Alibaba's MNN framework](https://github.com/alibaba/MNN) 推理库预测的。
 
-## Install MNN
+## C++ Demo
 
-### Python library
-
-Just run:
-
-``` shell
-pip install MNN
+- 第一步：根据[MNN官方编译文档](https://www.yuque.com/mnn/en/build_linux) 编译生成预测库.
+- 第二步：编译或下载得到OpenCV库，可参考OpenCV官网，为了方便如果环境是gcc8.2 x86环境，可直接下载以下库：
+```shell
+wget https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz
+tar -xf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz
 ```
 
-### C++ library
-
-Please follow the [official document](https://www.yuque.com/mnn/en/build_linux) to build MNN engine.
-- Create picodet_m_416_coco.onnx
+- 第三步：准备模型
     ```shell
-    modelName=picodet_m_416_coco
-    # export model
+    modelName=picodet_s_320_coco_lcnet
+    # 导出Inference model
     python tools/export_model.py \
             -c configs/picodet/${modelName}.yml \
             -o weights=${modelName}.pdparams \
             --output_dir=inference_model
-    # convert to onnx
+    # 转换到ONNX
     paddle2onnx --model_dir inference_model/${modelName} \
             --model_filename model.pdmodel  \
             --params_filename model.pdiparams \
             --opset_version 11 \
             --save_file ${modelName}.onnx
-    # onnxsim
+    # 简化模型
     python -m onnxsim ${modelName}.onnx ${modelName}_processed.onnx
+    # 将模型转换至MNN格式
+    python -m MNN.tools.mnnconvert -f ONNX --modelFile picodet_s_320_lcnet_processed.onnx --MNNModel picodet_s_320_lcnet.mnn
     ```
+为了快速测试，可直接下载：[picodet_s_320_lcnet.mnn](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_lcnet.mnn)（不带后处理）。
 
-- Convert model
-   ``` shell
-   python -m MNN.tools.mnnconvert -f ONNX --modelFile picodet-416.onnx --MNNModel picodet-416.mnn
-   ```
-Here are converted model [download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416.mnn).
+**注意：**由于MNN里，Matmul算子的输入shape如果不一致计算有问题，带后处理的Demo正在升级中，很快发布。
 
-## Build
-
-The python code *demo_mnn.py* can run directly and independently without main PicoDet repo.
-`PicoDetONNX` and `PicoDetTorch` are two classes used to check the similarity of MNN inference results
-with ONNX model and Pytorch model. They can be remove with no side effects.
-
-For C++ code, replace `libMNN.so` under *./mnn/lib* with the one you just compiled, modify OpenCV path and MNN path at CMake file,
-and run
+## 编译可执行程序
 
+- 第一步：导入lib包
+```
+mkdir mnn && cd mnn && mkdir lib
+cp /path/to/MNN/build/libMNN.so .
+cd ..
+cp -r /path/to/MNN/include .
+```
+- 第二步：修改CMakeLists.txt中OpenCV和MNN的路径
+- 第三步：开始编译
 ``` shell
 mkdir build && cd build
 cmake ..
 make
 ```
+如果在build目录下生成`picodet-mnn`可执行文件，就证明成功了。
 
-Note that a flag at `main.cpp` is used to control whether to show the detection result or save it into a fold.
-
-``` c++
-#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows
-```
-
-## Run
-
-### Python
-
-`demo_mnn.py` provide an inference class `PicoDetMNN` that combines preprocess, post process, visualization.
-Besides it can be used in command line with the form:
+## 开始运行
 
+首先新建预测结果存放目录：
 ```shell
-demo_mnn.py [-h] [--model_path MODEL_PATH] [--cfg_path CFG_PATH]
-    [--img_fold IMG_FOLD] [--result_fold RESULT_FOLD]
-    [--input_shape INPUT_SHAPE INPUT_SHAPE]
-    [--backend {MNN,ONNX,torch}]
+cp -r ../demo_onnxruntime/imgs .
+cd build
+mkdir ../results
 ```
 
-For example:
-
+- 预测一张图片
 ``` shell
-# run MNN 416 model
-python ./demo_mnn.py --model_path ../model/picodet-416.mnn --img_fold ../imgs --result_fold ../results
-# run MNN 320 model
-python ./demo_mnn.py --model_path ../model/picodet-320.mnn --input_shape 320 320 --backend MNN
-# run onnx model
-python ./demo_mnn.py --model_path ../model/sim.onnx --backend ONNX
+./picodet-mnn 0 ../picodet_s_320_lcnet_3.mnn 320 320 ../imgs/dog.jpg
 ```
 
-### C++
-
-C++ inference interface is same with NCNN code, to detect images in a fold, run:
+-测试速度Benchmark
 
 ``` shell
-./picodet-mnn "1" "../imgs/test.jpg"
+./picodet-mnn 1 ../picodet_s_320_lcnet.mnn 320 320
 ```
 
-For speed benchmark
+## FAQ
 
-``` shell
-./picodet-mnn "3" "0"
+- 预测结果精度不对：
+请先确认模型输入shape是否对齐，并且模型输出name是否对齐，不带后处理的PicoDet增强版模型输出name如下：
+```shell
+# 分类分支  |  检测分支
+{"transpose_0.tmp_0", "transpose_1.tmp_0"},
+{"transpose_2.tmp_0", "transpose_3.tmp_0"},
+{"transpose_4.tmp_0", "transpose_5.tmp_0"},
+{"transpose_6.tmp_0", "transpose_7.tmp_0"},
 ```
+可使用[netron](https://netron.app)查看具体name，并修改`picodet_mnn.hpp`中相应`non_postprocess_heads_info`数组。
 
 ## Reference
 [MNN](https://github.com/alibaba/MNN)
diff --git a/deploy/third_engine/demo_mnn/main.cpp b/deploy/third_engine/demo_mnn/main.cpp
index 52c977343..5737368d5 100644
--- a/deploy/third_engine/demo_mnn/main.cpp
+++ b/deploy/third_engine/demo_mnn/main.cpp
@@ -11,7 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
 
 #include "picodet_mnn.hpp"
 #include <iostream>
@@ -19,354 +18,186 @@
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 
-#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows
+#define __SAVE_RESULT__ // if defined save drawed results to ../results, else
+                        // show it in windows
 
 struct object_rect {
-    int x;
-    int y;
-    int width;
-    int height;
+  int x;
+  int y;
+  int width;
+  int height;
 };
 
-int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
-{
-    int w = src.cols;
-    int h = src.rows;
-    int dst_w = dst_size.width;
-    int dst_h = dst_size.height;
-    dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
-
-    float ratio_src = w * 1.0 / h;
-    float ratio_dst = dst_w * 1.0 / dst_h;
-
-    int tmp_w = 0;
-    int tmp_h = 0;
-    if (ratio_src > ratio_dst) {
-        tmp_w = dst_w;
-        tmp_h = floor((dst_w * 1.0 / w) * h);
-    }
-    else if (ratio_src < ratio_dst) {
-        tmp_h = dst_h;
-        tmp_w = floor((dst_h * 1.0 / h) * w);
-    }
-    else {
-        cv::resize(src, dst, dst_size);
-        effect_area.x = 0;
-        effect_area.y = 0;
-        effect_area.width = dst_w;
-        effect_area.height = dst_h;
-        return 0;
-    }
-    cv::Mat tmp;
-    cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
-
-    if (tmp_w != dst_w) {
-        int index_w = floor((dst_w - tmp_w) / 2.0);
-        for (int i = 0; i < dst_h; i++) {
-            memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
-        }
-        effect_area.x = index_w;
-        effect_area.y = 0;
-        effect_area.width = tmp_w;
-        effect_area.height = tmp_h;
-    }
-    else if (tmp_h != dst_h) {
-        int index_h = floor((dst_h - tmp_h) / 2.0);
-        memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
-        effect_area.x = 0;
-        effect_area.y = index_h;
-        effect_area.width = tmp_w;
-        effect_area.height = tmp_h;
-    }
-    else {
-        printf("error\n");
+std::vector<int> GenerateColorMap(int num_class) {
+  auto colormap = std::vector<int>(3 * num_class, 0);
+  for (int i = 0; i < num_class; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
     }
-    return 0;
+  }
+  return colormap;
 }
 
-const int color_list[80][3] =
-{
-    {216 , 82 , 24},
-    {236 ,176 , 31},
-    {125 , 46 ,141},
-    {118 ,171 , 47},
-    { 76 ,189 ,237},
-    {238 , 19 , 46},
-    { 76 , 76 , 76},
-    {153 ,153 ,153},
-    {255 ,  0 ,  0},
-    {255 ,127 ,  0},
-    {190 ,190 ,  0},
-    {  0 ,255 ,  0},
-    {  0 ,  0 ,255},
-    {170 ,  0 ,255},
-    { 84 , 84 ,  0},
-    { 84 ,170 ,  0},
-    { 84 ,255 ,  0},
-    {170 , 84 ,  0},
-    {170 ,170 ,  0},
-    {170 ,255 ,  0},
-    {255 , 84 ,  0},
-    {255 ,170 ,  0},
-    {255 ,255 ,  0},
-    {  0 , 84 ,127},
-    {  0 ,170 ,127},
-    {  0 ,255 ,127},
-    { 84 ,  0 ,127},
-    { 84 , 84 ,127},
-    { 84 ,170 ,127},
-    { 84 ,255 ,127},
-    {170 ,  0 ,127},
-    {170 , 84 ,127},
-    {170 ,170 ,127},
-    {170 ,255 ,127},
-    {255 ,  0 ,127},
-    {255 , 84 ,127},
-    {255 ,170 ,127},
-    {255 ,255 ,127},
-    {  0 , 84 ,255},
-    {  0 ,170 ,255},
-    {  0 ,255 ,255},
-    { 84 ,  0 ,255},
-    { 84 , 84 ,255},
-    { 84 ,170 ,255},
-    { 84 ,255 ,255},
-    {170 ,  0 ,255},
-    {170 , 84 ,255},
-    {170 ,170 ,255},
-    {170 ,255 ,255},
-    {255 ,  0 ,255},
-    {255 , 84 ,255},
-    {255 ,170 ,255},
-    { 42 ,  0 ,  0},
-    { 84 ,  0 ,  0},
-    {127 ,  0 ,  0},
-    {170 ,  0 ,  0},
-    {212 ,  0 ,  0},
-    {255 ,  0 ,  0},
-    {  0 , 42 ,  0},
-    {  0 , 84 ,  0},
-    {  0 ,127 ,  0},
-    {  0 ,170 ,  0},
-    {  0 ,212 ,  0},
-    {  0 ,255 ,  0},
-    {  0 ,  0 , 42},
-    {  0 ,  0 , 84},
-    {  0 ,  0 ,127},
-    {  0 ,  0 ,170},
-    {  0 ,  0 ,212},
-    {  0 ,  0 ,255},
-    {  0 ,  0 ,  0},
-    { 36 , 36 , 36},
-    { 72 , 72 , 72},
-    {109 ,109 ,109},
-    {145 ,145 ,145},
-    {182 ,182 ,182},
-    {218 ,218 ,218},
-    {  0 ,113 ,188},
-    { 80 ,182 ,188},
-    {127 ,127 ,  0},
-};
-
-void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi, std::string save_path="None")
-{
-    static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
-                                        "train", "truck", "boat", "traffic light", "fire hydrant",
-                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
-                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
-                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
-                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
-                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
-                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
-                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
-                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
-                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
-                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
-                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
-                                        "scissors", "teddy bear", "hair drier", "toothbrush"
-    };
-
-    cv::Mat image = bgr.clone();
-    int src_w = image.cols;
-    int src_h = image.rows;
-    int dst_w = effect_roi.width;
-    int dst_h = effect_roi.height;
-    float width_ratio = (float)src_w / (float)dst_w;
-    float height_ratio = (float)src_h / (float)dst_h;
-
-
-    for (size_t i = 0; i < bboxes.size(); i++)
-    {
-        const BoxInfo& bbox = bboxes[i];
-        cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
-        cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
-                                      cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
-
-        char text[256];
-        sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
-
-        int baseLine = 0;
-        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
-
-        int x = (bbox.x1 - effect_roi.x) * width_ratio;
-        int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
-        if (y < 0)
-            y = 0;
-        if (x + label_size.width > image.cols)
-            x = image.cols - label_size.width;
-
-        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
-            color, -1);
-
-        cv::putText(image, text, cv::Point(x, y + label_size.height),
-            cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
-    }
-
-    if (save_path == "None")
-    {
-        cv::imshow("image", image);
-    }
-    else
-    {
-        cv::imwrite(save_path, image);
-        std::cout << save_path << std::endl;
-    }
-}
-
-
-int image_demo(PicoDet &detector, const char* imagepath)
-{
-    std::vector<cv::String> filenames;
-    cv::glob(imagepath, filenames, false);
-
-    for (auto img_name : filenames)
-    {
-        cv::Mat image = cv::imread(img_name);
-        if (image.empty())
-        {
-            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
-            return -1;
-        }
-        object_rect effect_roi;
-        cv::Mat resized_img;
-        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
-        std::vector<BoxInfo> results;
-        detector.detect(resized_img, results);
-
-        #ifdef __SAVE_RESULT__
-            std::string save_path = img_name;
-            draw_bboxes(image, results, effect_roi, save_path.replace(3, 4, "results"));
-        #else
-            draw_bboxes(image, results, effect_roi);
-            cv::waitKey(0);
-        #endif
-
-    }
-    return 0;
+void draw_bboxes(const cv::Mat &im, const std::vector<BoxInfo> &bboxes,
+                 std::string save_path = "None") {
+  static const char *class_names[] = {
+      "person",        "bicycle",      "car",
+      "motorcycle",    "airplane",     "bus",
+      "train",         "truck",        "boat",
+      "traffic light", "fire hydrant", "stop sign",
+      "parking meter", "bench",        "bird",
+      "cat",           "dog",          "horse",
+      "sheep",         "cow",          "elephant",
+      "bear",          "zebra",        "giraffe",
+      "backpack",      "umbrella",     "handbag",
+      "tie",           "suitcase",     "frisbee",
+      "skis",          "snowboard",    "sports ball",
+      "kite",          "baseball bat", "baseball glove",
+      "skateboard",    "surfboard",    "tennis racket",
+      "bottle",        "wine glass",   "cup",
+      "fork",          "knife",        "spoon",
+      "bowl",          "banana",       "apple",
+      "sandwich",      "orange",       "broccoli",
+      "carrot",        "hot dog",      "pizza",
+      "donut",         "cake",         "chair",
+      "couch",         "potted plant", "bed",
+      "dining table",  "toilet",       "tv",
+      "laptop",        "mouse",        "remote",
+      "keyboard",      "cell phone",   "microwave",
+      "oven",          "toaster",      "sink",
+      "refrigerator",  "book",         "clock",
+      "vase",          "scissors",     "teddy bear",
+      "hair drier",    "toothbrush"};
+
+  cv::Mat image = im.clone();
+  int src_w = image.cols;
+  int src_h = image.rows;
+  int thickness = 2;
+  auto colormap = GenerateColorMap(sizeof(class_names));
+
+  for (size_t i = 0; i < bboxes.size(); i++) {
+    const BoxInfo &bbox = bboxes[i];
+    std::cout << bbox.x1 << ". " << bbox.y1 << ". " << bbox.x2 << ". "
+              << bbox.y2 << ". " << std::endl;
+    int c1 = colormap[3 * bbox.label + 0];
+    int c2 = colormap[3 * bbox.label + 1];
+    int c3 = colormap[3 * bbox.label + 2];
+    cv::Scalar color = cv::Scalar(c1, c2, c3);
+    // cv::Scalar color = cv::Scalar(0, 0, 255);
+    cv::rectangle(image, cv::Rect(cv::Point(bbox.x1, bbox.y1),
+                                  cv::Point(bbox.x2, bbox.y2)),
+                  color, 1, cv::LINE_AA);
+
+    char text[256];
+    sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
+
+    int baseLine = 0;
+    cv::Size label_size =
+        cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+    int x = bbox.x1;
+    int y = bbox.y1 - label_size.height - baseLine;
+    if (y < 0)
+      y = 0;
+    if (x + label_size.width > image.cols)
+      x = image.cols - label_size.width;
+
+    cv::rectangle(image, cv::Rect(cv::Point(x, y),
+                                  cv::Size(label_size.width,
+                                           label_size.height + baseLine)),
+                  color, -1);
+
+    cv::putText(image, text, cv::Point(x, y + label_size.height),
+                cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255), 1,
+                cv::LINE_AA);
+  }
+
+  if (save_path == "None") {
+    cv::imshow("image", image);
+  } else {
+    cv::imwrite(save_path, image);
+    std::cout << save_path << std::endl;
+  }
 }
 
-int webcam_demo(PicoDet& detector, int cam_id)
-{
-    cv::Mat image;
-    cv::VideoCapture cap(cam_id);
+int image_demo(PicoDet &detector, const char *imagepath) {
+  std::vector<cv::String> filenames;
+  cv::glob(imagepath, filenames, false);
 
-    while (true)
-    {
-        cap >> image;
-        object_rect effect_roi;
-        cv::Mat resized_img;
-        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
-        std::vector<BoxInfo> results;
-        detector.detect(resized_img, results);
-        draw_bboxes(image, results, effect_roi);
-        cv::waitKey(1);
+  for (auto img_name : filenames) {
+    cv::Mat image = cv::imread(img_name, cv::IMREAD_COLOR);
+    if (image.empty()) {
+      fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
+      return -1;
     }
-    return 0;
+    std::vector<BoxInfo> results;
+    detector.detect(image, results, false);
+    std::cout << "detect done." << std::endl;
+
+#ifdef __SAVE_RESULT__
+    std::string save_path = img_name;
+    draw_bboxes(image, results, save_path.replace(3, 4, "results"));
+#else
+    draw_bboxes(image, results);
+    cv::waitKey(0);
+#endif
+  }
+  return 0;
 }
 
-int video_demo(PicoDet& detector, const char* path)
-{
-    cv::Mat image;
-    cv::VideoCapture cap(path);
-
-    while (true)
-    {
-        cap >> image;
-        object_rect effect_roi;
-        cv::Mat resized_img;
-        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
-        std::vector<BoxInfo> results;
-        detector.detect(resized_img, results);
-        draw_bboxes(image, results, effect_roi);
-        cv::waitKey(1);
+int benchmark(PicoDet &detector, int width, int height) {
+  int loop_num = 100;
+  int warm_up = 8;
+
+  double time_min = DBL_MAX;
+  double time_max = -DBL_MAX;
+  double time_avg = 0;
+  cv::Mat image(width, height, CV_8UC3, cv::Scalar(1, 1, 1));
+  for (int i = 0; i < warm_up + loop_num; i++) {
+    auto start = std::chrono::steady_clock::now();
+    std::vector<BoxInfo> results;
+    detector.detect(image, results, false);
+    auto end = std::chrono::steady_clock::now();
+
+    std::chrono::duration<double> elapsed = end - start;
+    double time = elapsed.count();
+    if (i >= warm_up) {
+      time_min = (std::min)(time_min, time);
+      time_max = (std::max)(time_max, time);
+      time_avg += time;
     }
-    return 0;
+  }
+  time_avg /= loop_num;
+  fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet",
+          time_min, time_max, time_avg);
+  return 0;
 }
 
-int benchmark(PicoDet& detector)
-{
-    int loop_num = 100;
-    int warm_up = 8;
-
-    double time_min = DBL_MAX;
-    double time_max = -DBL_MAX;
-    double time_avg = 0;
-    cv::Mat image(320, 320, CV_8UC3, cv::Scalar(1, 1, 1));
-    for (int i = 0; i < warm_up + loop_num; i++)
-    {
-        auto start = std::chrono::steady_clock::now();
-        std::vector<BoxInfo> results;
-        detector.detect(image, results);
-        auto end = std::chrono::steady_clock::now();
-
-        std::chrono::duration<double> elapsed = end - start;
-        double time = elapsed.count();
-        if (i >= warm_up)
-        {
-            time_min = (std::min)(time_min, time);
-            time_max = (std::max)(time_max, time);
-            time_avg += time;
-        }
-    }
-    time_avg /= loop_num;
-    fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
-    return 0;
-}
-
-
-int main(int argc, char** argv)
-{
-    if (argc != 3)
-    {
-        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
-        return -1;
-    }
-    PicoDet detector = PicoDet("../weight/picodet-416.mnn", 416, 416, 4, 0.45, 0.3);
-    int mode = atoi(argv[1]);
-    switch (mode)
-    {
-    case 0:{
-        int cam_id = atoi(argv[2]);
-        webcam_demo(detector, cam_id);
-        break;
-        }
-    case 1:{
-        const char* images = argv[2];
-        image_demo(detector, images);
-        break;
-        }
-    case 2:{
-        const char* path = argv[2];
-        video_demo(detector, path);
-        break;
-        }
-    case 3:{
-        benchmark(detector);
-        break;
-        }
-    default:{
-        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
-        break;
-        }
+int main(int argc, char **argv) {
+  int mode = atoi(argv[1]);
+  std::string model_path = argv[2];
+  int height = 320;
+  int width = 320;
+  if (argc == 4) {
+    height = atoi(argv[3]);
+    width = atoi(argv[4]);
+  }
+  PicoDet detector = PicoDet(model_path, width, height, 4, 0.45, 0.3);
+  if (mode == 1) {
+    benchmark(detector, width, height);
+  } else {
+    if (argc != 5) {
+      std::cout << "Must set image file, such as ./picodet-mnn 0 "
+                   "../picodet_s_320_lcnet.mnn 320 320 img.jpg"
+                << std::endl;
     }
+    const char *images = argv[5];
+    image_demo(detector, images);
+  }
 }
diff --git a/deploy/third_engine/demo_mnn/picodet_mnn.cpp b/deploy/third_engine/demo_mnn/picodet_mnn.cpp
index d6cb9c9fd..a315f14a9 100644
--- a/deploy/third_engine/demo_mnn/picodet_mnn.cpp
+++ b/deploy/third_engine/demo_mnn/picodet_mnn.cpp
@@ -44,7 +44,8 @@ PicoDet::~PicoDet() {
   PicoDet_interpreter->releaseSession(PicoDet_session);
 }
 
-int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list) {
+int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list,
+                    bool has_postprocess) {
   if (raw_image.empty()) {
     std::cout << "image is empty ,please check!" << std::endl;
     return -1;
@@ -70,22 +71,57 @@ int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list) {
   std::vector<std::vector<BoxInfo>> results;
   results.resize(num_class);
 
-  for (const auto &head_info : heads_info) {
-    MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(
-        PicoDet_session, head_info.cls_layer.c_str());
-    MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(
-        PicoDet_session, head_info.dis_layer.c_str());
-
-    MNN::Tensor tensor_scores_host(tensor_scores,
-                                   tensor_scores->getDimensionType());
-    tensor_scores->copyToHostTensor(&tensor_scores_host);
-
-    MNN::Tensor tensor_boxes_host(tensor_boxes,
-                                  tensor_boxes->getDimensionType());
-    tensor_boxes->copyToHostTensor(&tensor_boxes_host);
-
-    decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride,
-                 score_threshold, results);
+  if (has_postprocess) {
+    auto bbox_out_tensor = PicoDet_interpreter->getSessionOutput(
+        PicoDet_session, nms_heads_info[0].c_str());
+    auto class_out_tensor = PicoDet_interpreter->getSessionOutput(
+        PicoDet_session, nms_heads_info[1].c_str());
+    // bbox branch
+    auto tensor_bbox_host =
+        new MNN::Tensor(bbox_out_tensor, MNN::Tensor::CAFFE);
+    bbox_out_tensor->copyToHostTensor(tensor_bbox_host);
+    auto bbox_output_shape = tensor_bbox_host->shape();
+    int output_size = 1;
+    for (int j = 0; j < bbox_output_shape.size(); ++j) {
+      output_size *= bbox_output_shape[j];
+    }
+    std::cout << "output_size:" << output_size << std::endl;
+    bbox_output_data_.resize(output_size);
+    std::copy_n(tensor_bbox_host->host<float>(), output_size,
+                bbox_output_data_.data());
+    delete tensor_bbox_host;
+    // class branch
+    auto tensor_class_host =
+        new MNN::Tensor(class_out_tensor, MNN::Tensor::CAFFE);
+    class_out_tensor->copyToHostTensor(tensor_class_host);
+    auto class_output_shape = tensor_class_host->shape();
+    output_size = 1;
+    for (int j = 0; j < class_output_shape.size(); ++j) {
+      output_size *= class_output_shape[j];
+    }
+    std::cout << "output_size:" << output_size << std::endl;
+    class_output_data_.resize(output_size);
+    std::copy_n(tensor_class_host->host<float>(), output_size,
+                class_output_data_.data());
+    delete tensor_class_host;
+  } else {
+    for (const auto &head_info : non_postprocess_heads_info) {
+      MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(
+          PicoDet_session, head_info.cls_layer.c_str());
+      MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(
+          PicoDet_session, head_info.dis_layer.c_str());
+
+      MNN::Tensor tensor_scores_host(tensor_scores,
+                                     tensor_scores->getDimensionType());
+      tensor_scores->copyToHostTensor(&tensor_scores_host);
+
+      MNN::Tensor tensor_boxes_host(tensor_boxes,
+                                    tensor_boxes->getDimensionType());
+      tensor_boxes->copyToHostTensor(&tensor_boxes_host);
+
+      decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride,
+                   score_threshold, results);
+    }
   }
 
   auto end = chrono::steady_clock::now();
@@ -188,8 +224,6 @@ void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
   }
 }
 
-string PicoDet::get_label_str(int label) { return labels[label]; }
-
 inline float fast_exp(float x) {
   union {
     uint32_t i;
diff --git a/deploy/third_engine/demo_mnn/picodet_mnn.hpp b/deploy/third_engine/demo_mnn/picodet_mnn.hpp
index ecece8b17..4744040e2 100644
--- a/deploy/third_engine/demo_mnn/picodet_mnn.hpp
+++ b/deploy/third_engine/demo_mnn/picodet_mnn.hpp
@@ -11,7 +11,6 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
 
 #ifndef __PicoDet_H__
 #define __PicoDet_H__
@@ -20,90 +19,84 @@
 
 #include "Interpreter.hpp"
 
+#include "ImageProcess.hpp"
 #include "MNNDefine.h"
 #include "Tensor.hpp"
-#include "ImageProcess.hpp"
-#include <opencv2/opencv.hpp>
 #include <algorithm>
+#include <chrono>
 #include <iostream>
+#include <memory>
+#include <opencv2/opencv.hpp>
 #include <string>
 #include <vector>
-#include <memory>
-#include <chrono>
 
-
-typedef struct HeadInfo_
-{
-    std::string cls_layer;
-    std::string dis_layer;
-    int stride;
-} HeadInfo;
-
-typedef struct BoxInfo_
-{
-    float x1;
-    float y1;
-    float x2;
-    float y2;
-    float score;
-    int label;
+typedef struct NonPostProcessHeadInfo_ {
+  std::string cls_layer;
+  std::string dis_layer;
+  int stride;
+} NonPostProcessHeadInfo;
+
+typedef struct BoxInfo_ {
+  float x1;
+  float y1;
+  float x2;
+  float y2;
+  float score;
+  int label;
 } BoxInfo;
 
 class PicoDet {
 public:
-    PicoDet(const std::string &mnn_path,
-            int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.5, float nms_threshold_ = 0.3);
+  PicoDet(const std::string &mnn_path, int input_width, int input_length,
+          int num_thread_ = 4, float score_threshold_ = 0.5,
+          float nms_threshold_ = 0.3);
 
-    ~PicoDet();
+  ~PicoDet();
 
-    int detect(cv::Mat &img, std::vector<BoxInfo> &result_list);
-    std::string get_label_str(int label);
+  int detect(cv::Mat &img, std::vector<BoxInfo> &result_list,
+             bool has_postprocess);
 
 private:
-    void decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>> &results);
-    BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride);
-    void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH);
+  void decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride,
+                    float threshold,
+                    std::vector<std::vector<BoxInfo>> &results);
+  BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x,
+                       int y, int stride);
+  void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH);
 
 private:
-
-    std::shared_ptr<MNN::Interpreter> PicoDet_interpreter;
-    MNN::Session *PicoDet_session = nullptr;
-    MNN::Tensor *input_tensor = nullptr;
-
-    int num_thread;
-    int image_w;
-    int image_h;
-
-    int in_w = 320;
-    int in_h = 320;
-
-    float score_threshold;
-    float nms_threshold;
-
-    const float mean_vals[3] = { 103.53f, 116.28f, 123.675f };
-    const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f };
-
-    const int num_class = 80;
-    const int reg_max = 7;
-
-    std::vector<HeadInfo> heads_info{
-        // cls_pred|dis_pred|stride
-        {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
-        {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
-        {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
-        {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
-    };
-
-    std::vector<std::string>
-    labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
-           "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
-           "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
-           "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
-           "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
-           "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
-           "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
-           "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
-           "hair drier", "toothbrush"};
+  std::shared_ptr<MNN::Interpreter> PicoDet_interpreter;
+  MNN::Session *PicoDet_session = nullptr;
+  MNN::Tensor *input_tensor = nullptr;
+
+  int num_thread;
+  int image_w;
+  int image_h;
+
+  int in_w = 320;
+  int in_h = 320;
+
+  float score_threshold;
+  float nms_threshold;
+
+  const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
+  const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
+
+  const int num_class = 80;
+  const int reg_max = 7;
+
+  std::vector<float> bbox_output_data_;
+  std::vector<float> class_output_data_;
+
+  std::vector<std::string> nms_heads_info{"tmp_16", "concat_4.tmp_0"};
+  // If not export post-process, will use non_postprocess_heads_info
+  std::vector<NonPostProcessHeadInfo> non_postprocess_heads_info{
+      // cls_pred|dis_pred|stride
+      {"transpose_0.tmp_0", "transpose_1.tmp_0", 8},
+      {"transpose_2.tmp_0", "transpose_3.tmp_0", 16},
+      {"transpose_4.tmp_0", "transpose_5.tmp_0", 32},
+      {"transpose_6.tmp_0", "transpose_7.tmp_0", 64},
+  };
 };
 
 template <typename _Tp>
diff --git a/deploy/third_engine/demo_mnn/python/demo_mnn.py b/deploy/third_engine/demo_mnn/python/demo_mnn.py
deleted file mode 100644
index c5f880938..000000000
--- a/deploy/third_engine/demo_mnn/python/demo_mnn.py
+++ /dev/null
@@ -1,803 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
-
-# -*- coding: utf-8 -*-
-import argparse
-from abc import ABCMeta, abstractmethod
-from pathlib import Path
-
-import cv2
-import matplotlib.pyplot as plt
-import numpy as np
-from scipy.special import softmax
-from tqdm import tqdm
-
-_COLORS = (np.array([
-    0.000,
-    0.447,
-    0.741,
-    0.850,
-    0.325,
-    0.098,
-    0.929,
-    0.694,
-    0.125,
-    0.494,
-    0.184,
-    0.556,
-    0.466,
-    0.674,
-    0.188,
-    0.301,
-    0.745,
-    0.933,
-    0.635,
-    0.078,
-    0.184,
-    0.300,
-    0.300,
-    0.300,
-    0.600,
-    0.600,
-    0.600,
-    1.000,
-    0.000,
-    0.000,
-    1.000,
-    0.500,
-    0.000,
-    0.749,
-    0.749,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    1.000,
-    0.667,
-    0.000,
-    1.000,
-    0.333,
-    0.333,
-    0.000,
-    0.333,
-    0.667,
-    0.000,
-    0.333,
-    1.000,
-    0.000,
-    0.667,
-    0.333,
-    0.000,
-    0.667,
-    0.667,
-    0.000,
-    0.667,
-    1.000,
-    0.000,
-    1.000,
-    0.333,
-    0.000,
-    1.000,
-    0.667,
-    0.000,
-    1.000,
-    1.000,
-    0.000,
-    0.000,
-    0.333,
-    0.500,
-    0.000,
-    0.667,
-    0.500,
-    0.000,
-    1.000,
-    0.500,
-    0.333,
-    0.000,
-    0.500,
-    0.333,
-    0.333,
-    0.500,
-    0.333,
-    0.667,
-    0.500,
-    0.333,
-    1.000,
-    0.500,
-    0.667,
-    0.000,
-    0.500,
-    0.667,
-    0.333,
-    0.500,
-    0.667,
-    0.667,
-    0.500,
-    0.667,
-    1.000,
-    0.500,
-    1.000,
-    0.000,
-    0.500,
-    1.000,
-    0.333,
-    0.500,
-    1.000,
-    0.667,
-    0.500,
-    1.000,
-    1.000,
-    0.500,
-    0.000,
-    0.333,
-    1.000,
-    0.000,
-    0.667,
-    1.000,
-    0.000,
-    1.000,
-    1.000,
-    0.333,
-    0.000,
-    1.000,
-    0.333,
-    0.333,
-    1.000,
-    0.333,
-    0.667,
-    1.000,
-    0.333,
-    1.000,
-    1.000,
-    0.667,
-    0.000,
-    1.000,
-    0.667,
-    0.333,
-    1.000,
-    0.667,
-    0.667,
-    1.000,
-    0.667,
-    1.000,
-    1.000,
-    1.000,
-    0.000,
-    1.000,
-    1.000,
-    0.333,
-    1.000,
-    1.000,
-    0.667,
-    1.000,
-    0.333,
-    0.000,
-    0.000,
-    0.500,
-    0.000,
-    0.000,
-    0.667,
-    0.000,
-    0.000,
-    0.833,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    0.167,
-    0.000,
-    0.000,
-    0.333,
-    0.000,
-    0.000,
-    0.500,
-    0.000,
-    0.000,
-    0.667,
-    0.000,
-    0.000,
-    0.833,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    0.167,
-    0.000,
-    0.000,
-    0.333,
-    0.000,
-    0.000,
-    0.500,
-    0.000,
-    0.000,
-    0.667,
-    0.000,
-    0.000,
-    0.833,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    0.143,
-    0.143,
-    0.143,
-    0.286,
-    0.286,
-    0.286,
-    0.429,
-    0.429,
-    0.429,
-    0.571,
-    0.571,
-    0.571,
-    0.714,
-    0.714,
-    0.714,
-    0.857,
-    0.857,
-    0.857,
-    0.000,
-    0.447,
-    0.741,
-    0.314,
-    0.717,
-    0.741,
-    0.50,
-    0.5,
-    0,
-]).astype(np.float32).reshape(-1, 3))
-
-
-def get_resize_matrix(raw_shape, dst_shape, keep_ratio):
-    """
-    Get resize matrix for resizing raw img to input size
-    :param raw_shape: (width, height) of raw image
-    :param dst_shape: (width, height) of input image
-    :param keep_ratio: whether keep original ratio
-    :return: 3x3 Matrix
-    """
-    r_w, r_h = raw_shape
-    d_w, d_h = dst_shape
-    Rs = np.eye(3)
-    if keep_ratio:
-        C = np.eye(3)
-        C[0, 2] = -r_w / 2
-        C[1, 2] = -r_h / 2
-
-        if r_w / r_h < d_w / d_h:
-            ratio = d_h / r_h
-        else:
-            ratio = d_w / r_w
-        Rs[0, 0] *= ratio
-        Rs[1, 1] *= ratio
-
-        T = np.eye(3)
-        T[0, 2] = 0.5 * d_w
-        T[1, 2] = 0.5 * d_h
-        return T @Rs @C
-    else:
-        Rs[0, 0] *= d_w / r_w
-        Rs[1, 1] *= d_h / r_h
-        return Rs
-
-
-def warp_boxes(boxes, M, width, height):
-    """Apply transform to boxes
-    Copy from picodet/data/transform/warp.py
-    """
-    n = len(boxes)
-    if n:
-        # warp points
-        xy = np.ones((n * 4, 3))
-        xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
-            n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
-        xy = xy @M.T  # transform
-        xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
-        # create new boxes
-        x = xy[:, [0, 2, 4, 6]]
-        y = xy[:, [1, 3, 5, 7]]
-        xy = np.concatenate(
-            (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-        # clip boxes
-        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
-        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
-        return xy.astype(np.float32)
-    else:
-        return boxes
-
-
-def overlay_bbox_cv(img, all_box, class_names):
-    """Draw result boxes
-    Copy from picodet/util/visualization.py
-    """
-    # all_box array of [label, x0, y0, x1, y1, score]
-    all_box.sort(key=lambda v: v[5])
-    for box in all_box:
-        label, x0, y0, x1, y1, score = box
-        color = (_COLORS[label] * 255).astype(np.uint8).tolist()
-        text = "{}:{:.1f}%".format(class_names[label], score * 100)
-        txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255,
-                                                                     255)
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        txt_size = cv2.getTextSize(text, font, 0.5, 2)[0]
-        cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
-
-        cv2.rectangle(
-            img,
-            (x0, y0 - txt_size[1] - 1),
-            (x0 + txt_size[0] + txt_size[1], y0 - 1),
-            color,
-            -1, )
-        cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1)
-    return img
-
-
-def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
-    """
-
-    Args:
-        box_scores (N, 5): boxes in corner-form and probabilities.
-        iou_threshold: intersection over union threshold.
-        top_k: keep top_k results. If k <= 0, keep all the results.
-        candidate_size: only consider the candidates with the highest scores.
-    Returns:
-         picked: a list of indexes of the kept boxes
-    """
-    scores = box_scores[:, -1]
-    boxes = box_scores[:, :-1]
-    picked = []
-    indexes = np.argsort(scores)
-    indexes = indexes[-candidate_size:]
-    while len(indexes) > 0:
-        current = indexes[-1]
-        picked.append(current)
-        if 0 < top_k == len(picked) or len(indexes) == 1:
-            break
-        current_box = boxes[current, :]
-        indexes = indexes[:-1]
-        rest_boxes = boxes[indexes, :]
-        iou = iou_of(
-            rest_boxes,
-            np.expand_dims(
-                current_box, axis=0), )
-        indexes = indexes[iou <= iou_threshold]
-
-    return box_scores[picked, :]
-
-
-def iou_of(boxes0, boxes1, eps=1e-5):
-    """Return intersection-over-union (Jaccard index) of boxes.
-
-    Args:
-        boxes0 (N, 4): ground truth boxes.
-        boxes1 (N or 1, 4): predicted boxes.
-        eps: a small number to avoid 0 as denominator.
-    Returns:
-        iou (N): IoU values.
-    """
-    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
-    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
-
-    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
-    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
-    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
-    return overlap_area / (area0 + area1 - overlap_area + eps)
-
-
-def area_of(left_top, right_bottom):
-    """Compute the areas of rectangles given two corners.
-
-    Args:
-        left_top (N, 2): left top corner.
-        right_bottom (N, 2): right bottom corner.
-
-    Returns:
-        area (N): return the area.
-    """
-    hw = np.clip(right_bottom - left_top, 0.0, None)
-    return hw[..., 0] * hw[..., 1]
-
-
-class PicoDetABC(metaclass=ABCMeta):
-    def __init__(
-            self,
-            input_shape=[416, 416],
-            reg_max=7,
-            strides=[8, 16, 32, 64],
-            prob_threshold=0.4,
-            iou_threshold=0.3,
-            num_candidate=1000,
-            top_k=-1, ):
-        self.strides = strides
-        self.input_shape = input_shape
-        self.reg_max = reg_max
-        self.prob_threshold = prob_threshold
-        self.iou_threshold = iou_threshold
-        self.num_candidate = num_candidate
-        self.top_k = top_k
-        self.img_mean = [103.53, 116.28, 123.675]
-        self.img_std = [57.375, 57.12, 58.395]
-        self.input_size = (self.input_shape[1], self.input_shape[0])
-        self.class_names = [
-            "person",
-            "bicycle",
-            "car",
-            "motorcycle",
-            "airplane",
-            "bus",
-            "train",
-            "truck",
-            "boat",
-            "traffic_light",
-            "fire_hydrant",
-            "stop_sign",
-            "parking_meter",
-            "bench",
-            "bird",
-            "cat",
-            "dog",
-            "horse",
-            "sheep",
-            "cow",
-            "elephant",
-            "bear",
-            "zebra",
-            "giraffe",
-            "backpack",
-            "umbrella",
-            "handbag",
-            "tie",
-            "suitcase",
-            "frisbee",
-            "skis",
-            "snowboard",
-            "sports_ball",
-            "kite",
-            "baseball_bat",
-            "baseball_glove",
-            "skateboard",
-            "surfboard",
-            "tennis_racket",
-            "bottle",
-            "wine_glass",
-            "cup",
-            "fork",
-            "knife",
-            "spoon",
-            "bowl",
-            "banana",
-            "apple",
-            "sandwich",
-            "orange",
-            "broccoli",
-            "carrot",
-            "hot_dog",
-            "pizza",
-            "donut",
-            "cake",
-            "chair",
-            "couch",
-            "potted_plant",
-            "bed",
-            "dining_table",
-            "toilet",
-            "tv",
-            "laptop",
-            "mouse",
-            "remote",
-            "keyboard",
-            "cell_phone",
-            "microwave",
-            "oven",
-            "toaster",
-            "sink",
-            "refrigerator",
-            "book",
-            "clock",
-            "vase",
-            "scissors",
-            "teddy_bear",
-            "hair_drier",
-            "toothbrush",
-        ]
-
-    def preprocess(self, img):
-        # resize image
-        ResizeM = get_resize_matrix((img.shape[1], img.shape[0]),
-                                    self.input_size, True)
-        img_resize = cv2.warpPerspective(img, ResizeM, dsize=self.input_size)
-        # normalize image
-        img_input = img_resize.astype(np.float32) / 255
-        img_mean = np.array(
-            self.img_mean, dtype=np.float32).reshape(1, 1, 3) / 255
-        img_std = np.array(
-            self.img_std, dtype=np.float32).reshape(1, 1, 3) / 255
-        img_input = (img_input - img_mean) / img_std
-        # expand dims
-        img_input = np.transpose(img_input, [2, 0, 1])
-        img_input = np.expand_dims(img_input, axis=0)
-        return img_input, ResizeM
-
-    def postprocess(self, scores, raw_boxes, ResizeM, raw_shape):
-        # generate centers
-        decode_boxes = []
-        select_scores = []
-        for stride, box_distribute, score in zip(self.strides, raw_boxes,
-                                                 scores):
-            # centers
-            fm_h = self.input_shape[0] / stride
-            fm_w = self.input_shape[1] / stride
-            h_range = np.arange(fm_h)
-            w_range = np.arange(fm_w)
-            ww, hh = np.meshgrid(w_range, h_range)
-            ct_row = (hh.flatten() + 0.5) * stride
-            ct_col = (ww.flatten() + 0.5) * stride
-            center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
-
-            # box distribution to distance
-            reg_range = np.arange(self.reg_max + 1)
-            box_distance = box_distribute.reshape((-1, self.reg_max + 1))
-            box_distance = softmax(box_distance, axis=1)
-            box_distance = box_distance * np.expand_dims(reg_range, axis=0)
-            box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
-            box_distance = box_distance * stride
-
-            # top K candidate
-            topk_idx = np.argsort(score.max(axis=1))[::-1]
-            topk_idx = topk_idx[:C]
-            center = center[topk_idx]
-            score = score[topk_idx]
-            box_distance = box_distance[topk_idx]
-
-            # decode box
-            decode_box = center + [-1, -1, 1, 1] * box_distance
-
-            select_scores.append(score)
-            decode_boxes.append(decode_box)
-
-        # nms
-        bboxes = np.concatenate(decode_boxes, axis=0)
-        confidences = np.concatenate(select_scores, axis=0)
-        picked_box_probs = []
-        picked_labels = []
-        for class_index in range(0, confidences.shape[1]):
-            probs = confidences[:, class_index]
-            mask = probs > self.prob_threshold
-            probs = probs[mask]
-            if probs.shape[0] == 0:
-                continue
-            subset_boxes = bboxes[mask, :]
-            box_probs = np.concatenate(
-                [subset_boxes, probs.reshape(-1, 1)], axis=1)
-            box_probs = hard_nms(
-                box_probs,
-                iou_threshold=self.iou_threshold,
-                top_k=self.top_k, )
-            picked_box_probs.append(box_probs)
-            picked_labels.extend([class_index] * box_probs.shape[0])
-        if not picked_box_probs:
-            return np.array([]), np.array([]), np.array([])
-        picked_box_probs = np.concatenate(picked_box_probs)
-
-        # resize output boxes
-        picked_box_probs[:, :4] = warp_boxes(picked_box_probs[:, :4],
-                                             np.linalg.inv(ResizeM),
-                                             raw_shape[1], raw_shape[0])
-        return (
-            picked_box_probs[:, :4].astype(np.int32),
-            np.array(picked_labels),
-            picked_box_probs[:, 4], )
-
-    @abstractmethod
-    def infer_image(self, img_input):
-        pass
-
-    def detect(self, img):
-        raw_shape = img.shape
-        img_input, ResizeM = self.preprocess(img)
-        scores, raw_boxes = self.infer_image(img_input)
-        if scores[0].ndim == 1:  # handling num_classes=1 case
-            scores = [x[:, None] for x in scores]
-        bbox, label, score = self.postprocess(scores, raw_boxes, ResizeM,
-                                              raw_shape)
-
-        print(bbox, score)
-        return bbox, label, score
-
-    def draw_box(self, raw_img, bbox, label, score):
-        img = raw_img.copy()
-        all_box = [[x, ] + y + [z, ]
-                   for x, y, z in zip(label, bbox.tolist(), score)]
-        img_draw = overlay_bbox_cv(img, all_box, self.class_names)
-        return img_draw
-
-    def detect_folder(self, img_fold, result_path):
-        img_fold = Path(img_fold)
-        result_path = Path(result_path)
-        result_path.mkdir(parents=True, exist_ok=True)
-
-        img_name_list = filter(
-            lambda x: str(x).endswith(".png") or str(x).endswith(".jpg"),
-            img_fold.iterdir(), )
-        img_name_list = list(img_name_list)
-        print(f"find {len(img_name_list)} images")
-
-        for img_path in tqdm(img_name_list):
-            img = cv2.imread(str(img_path))
-            bbox, label, score = self.detect(img)
-            img_draw = self.draw_box(img, bbox, label, score)
-            save_path = str(result_path / img_path.name.replace(".png", ".jpg"))
-            cv2.imwrite(save_path, img_draw)
-
-
-class PicoDetMNN(PicoDetABC):
-    import MNN as MNNlib
-
-    def __init__(self, model_path, *args, **kwargs):
-        super(PicoDetMNN, self).__init__(*args, **kwargs)
-        print("Using MNN as inference backend")
-        print(f"Using weight: {model_path}")
-
-        # load model
-        self.model_path = model_path
-        self.interpreter = self.MNNlib.Interpreter(self.model_path)
-        self.session = self.interpreter.createSession()
-        self.input_tensor = self.interpreter.getSessionInput(self.session)
-
-    def infer_image(self, img_input):
-        tmp_input = self.MNNlib.Tensor(
-            (1, 3, self.input_size[1], self.input_size[0]),
-            self.MNNlib.Halide_Type_Float,
-            img_input,
-            self.MNNlib.Tensor_DimensionType_Caffe, )
-        self.input_tensor.copyFrom(tmp_input)
-        self.interpreter.runSession(self.session)
-        score_out_name = [
-            "save_infer_model/scale_0.tmp_1", "save_infer_model/scale_1.tmp_1",
-            "save_infer_model/scale_2.tmp_1", "save_infer_model/scale_3.tmp_1"
-        ]
-        scores = [
-            self.interpreter.getSessionOutput(self.session, x).getData()
-            for x in score_out_name
-        ]
-        scores = [np.reshape(x, (-1, 80)) for x in scores]
-        boxes_out_name = [
-            "save_infer_model/scale_4.tmp_1", "save_infer_model/scale_5.tmp_1",
-            "save_infer_model/scale_6.tmp_1", "save_infer_model/scale_7.tmp_1"
-        ]
-        raw_boxes = [
-            self.interpreter.getSessionOutput(self.session, x).getData()
-            for x in boxes_out_name
-        ]
-        raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes]
-        return scores, raw_boxes
-
-
-class PicoDetONNX(PicoDetABC):
-    import onnxruntime as ort
-
-    def __init__(self, model_path, *args, **kwargs):
-        super(PicoDetONNX, self).__init__(*args, **kwargs)
-        print("Using ONNX as inference backend")
-        print(f"Using weight: {model_path}")
-
-        # load model
-        self.model_path = model_path
-        self.ort_session = self.ort.InferenceSession(self.model_path)
-        self.input_name = self.ort_session.get_inputs()[0].name
-
-    def infer_image(self, img_input):
-        inference_results = self.ort_session.run(None,
-                                                 {self.input_name: img_input})
-        scores = [np.squeeze(x) for x in inference_results[:3]]
-        raw_boxes = [np.squeeze(x) for x in inference_results[3:]]
-        return scores, raw_boxes
-
-
-class PicoDetTorch(PicoDetABC):
-    import torch
-
-    def __init__(self, model_path, cfg_path, *args, **kwargs):
-        from picodet.model.arch import build_model
-        from picodet.util import Logger, cfg, load_config, load_model_weight
-
-        super(PicoDetTorch, self).__init__(*args, **kwargs)
-        print("Using PyTorch as inference backend")
-        print(f"Using weight: {model_path}")
-
-        # load model
-        self.model_path = model_path
-        self.cfg_path = cfg_path
-        load_config(cfg, cfg_path)
-        self.logger = Logger(-1, cfg.save_dir, False)
-        self.model = build_model(cfg.model)
-        checkpoint = self.torch.load(
-            model_path, map_location=lambda storage, loc: storage)
-        load_model_weight(self.model, checkpoint, self.logger)
-
-    def infer_image(self, img_input):
-        self.model.train(False)
-        with self.torch.no_grad():
-            inference_results = self.model(self.torch.from_numpy(img_input))
-        scores = [
-            x.permute(0, 2, 3, 1).reshape((-1, 80)).sigmoid().detach().numpy()
-            for x in inference_results[0]
-        ]
-        raw_boxes = [
-            x.permute(0, 2, 3, 1).reshape((-1, 32)).detach().numpy()
-            for x in inference_results[1]
-        ]
-        return scores, raw_boxes
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--model_path",
-        dest="model_path",
-        type=str,
-        default="../model/picodet-320.mnn")
-    parser.add_argument(
-        "--cfg_path", dest="cfg_path", type=str, default="config/picodet-m.yml")
-    parser.add_argument(
-        "--img_fold", dest="img_fold", type=str, default="../imgs")
-    parser.add_argument(
-        "--result_fold", dest="result_fold", type=str, default="../results")
-    parser.add_argument(
-        "--input_shape",
-        dest="input_shape",
-        nargs=2,
-        type=int,
-        default=[320, 320])
-    parser.add_argument(
-        "--backend", choices=["MNN", "ONNX", "torch"], default="MNN")
-    args = parser.parse_args()
-
-    print(f"Detecting {args.img_fold}")
-
-    # load detector
-    if args.backend == "MNN":
-        detector = PicoDetMNN(args.model_path, input_shape=args.input_shape)
-    elif args.backend == "ONNX":
-        detector = PicoDetONNX(args.model_path, input_shape=args.input_shape)
-    elif args.backend == "torch":
-        detector = PicoDetTorch(
-            args.model_path, args.cfg_path, input_shape=args.input_shape)
-    else:
-        raise ValueError
-
-    # detect folder
-    detector.detect_folder(args.img_fold, args.result_fold)
-
-
-def test_one():
-    detector = PicoDetMNN("../weight/picodet-416.mnn")
-    img = cv2.imread("../imgs/000252.jpg")
-    bbox, label, score = detector.detect(img)
-    img_draw = detector.draw_box(img, bbox, label, score)
-    cv2.imwrite('picodet_infer.jpg', img_draw)
-
-
-if __name__ == "__main__":
-    # main()
-    test_one()
diff --git a/deploy/third_engine/demo_ncnn/CMakeLists.txt b/deploy/third_engine/demo_ncnn/CMakeLists.txt
index 4f5cc65fc..0d4344c69 100644
--- a/deploy/third_engine/demo_ncnn/CMakeLists.txt
+++ b/deploy/third_engine/demo_ncnn/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.4.1)
+cmake_minimum_required(VERSION 3.9)
 set(CMAKE_CXX_STANDARD 17)
 
 project(picodet_demo)
@@ -11,9 +11,11 @@ if(OPENMP_FOUND)
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
 endif()
 
-find_package(OpenCV REQUIRED)
+# find_package(OpenCV REQUIRED)
+find_package(OpenCV REQUIRED PATHS "/path/to/opencv-3.4.16_gcc8.2_ffmpeg")
 
-find_package(ncnn REQUIRED)
+# find_package(ncnn REQUIRED)
+find_package(ncnn REQUIRED PATHS "/path/to/ncnn/build/install/lib/cmake/ncnn")
 if(NOT TARGET ncnn)
     message(WARNING "ncnn NOT FOUND!  Please set ncnn_DIR environment variable")
 else()
diff --git a/deploy/third_engine/demo_ncnn/README.md b/deploy/third_engine/demo_ncnn/README.md
index b15052d98..f9867b8ac 100644
--- a/deploy/third_engine/demo_ncnn/README.md
+++ b/deploy/third_engine/demo_ncnn/README.md
@@ -1,10 +1,8 @@
 # PicoDet NCNN Demo
 
-This project provides PicoDet image inference, webcam inference and benchmark using
-[Tencent's NCNN framework](https://github.com/Tencent/ncnn).
-
-# How to build
+该Demo提供的预测代码是根据[Tencent's NCNN framework](https://github.com/Tencent/ncnn)推理库预测的。
 
+# 第一步：编译
 ## Windows
 ### Step1.
 Download and Install Visual Studio from https://visualstudio.microsoft.com/vs/community/
@@ -12,11 +10,16 @@ Download and Install Visual Studio from https://visualstudio.microsoft.com/vs/co
 ### Step2.
 Download and install OpenCV from https://github.com/opencv/opencv/releases
 
-### Step3(Optional).
+为了方便，如果环境是gcc8.2 x86环境，可直接下载以下库：
+```shell
+wget https://paddledet.bj.bcebos.com/data/opencv-3.4.16_gcc8.2_ffmpeg.tar.gz
+tar -xf opencv-3.4.16_gcc8.2_ffmpeg.tar.gz
+```
+
+### Step3(可选).
 Download and install Vulkan SDK from https://vulkan.lunarg.com/sdk/home
 
-### Step4.
-Clone NCNN repository
+### Step4：编译NCNN
 
 ``` shell script
 git clone --recursive https://github.com/Tencent/ncnn.git
@@ -25,7 +28,7 @@ Build NCNN following this tutorial: [Build for Windows x64 using VS2017](https:/
 
 ### Step5.
 
-Add `ncnn_DIR` = `YOUR_NCNN_PATH/build/install/lib/cmake/ncnn` to system environment variables.
+增加 `ncnn_DIR` = `YOUR_NCNN_PATH/build/install/lib/cmake/ncnn` 到系统变量中
 
 Build project: Open x64 Native Tools Command Prompt for VS 2019 or 2017
 
@@ -42,10 +45,10 @@ msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64
 ### Step1.
 Build and install OpenCV from https://github.com/opencv/opencv
 
-### Step2(Optional).
+### Step2(可选).
 Download Vulkan SDK from https://vulkan.lunarg.com/sdk/home
 
-### Step3.
+### Step3：编译NCNN
 Clone NCNN repository
 
 ``` shell script
@@ -54,15 +57,7 @@ git clone --recursive https://github.com/Tencent/ncnn.git
 
 Build NCNN following this tutorial: [Build for Linux / NVIDIA Jetson / Raspberry Pi](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux)
 
-### Step4.
-
-Set environment variables. Run:
-
-``` shell script
-export ncnn_DIR=YOUR_NCNN_PATH/build/install/lib/cmake/ncnn
-```
-
-Build project
+### Step4：编译可执行文件
 
 ``` shell script
 cd <this-folder>
@@ -71,47 +66,64 @@ cd build
 cmake ..
 make
 ```
-
 # Run demo
 
-Download PicoDet ncnn model.
-* [PicoDet ncnn model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_ncnn.zip)
-
-
-## Webcam
-
-```shell script
-picodet_demo 0 0
+- 准备模型
+    ```shell
+    modelName=picodet_s_320_coco_lcnet
+    # 导出Inference model
+    python tools/export_model.py \
+            -c configs/picodet/${modelName}.yml \
+            -o weights=${modelName}.pdparams \
+            --output_dir=inference_model
+    # 转换到ONNX
+    paddle2onnx --model_dir inference_model/${modelName} \
+            --model_filename model.pdmodel  \
+            --params_filename model.pdiparams \
+            --opset_version 11 \
+            --save_file ${modelName}.onnx
+    # 简化模型
+    python -m onnxsim ${modelName}.onnx ${modelName}_processed.onnx
+    # 将模型转换至NCNN格式
+    Run onnx2ncnn in ncnn tools to generate ncnn .param and .bin file.
+    ```
+转NCNN模型可以利用在线转换工具 [https://convertmodel.com](https://convertmodel.com/)
+
+为了快速测试，可直接下载：[picodet_s_320_coco_lcnet-opt.bin](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_coco_lcnet-opt.bin)/ [picodet_s_320_coco_lcnet-opt.param](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_s_320_coco_lcnet-opt.param)（不带后处理）。
+
+**注意：**由于带后处理后，NCNN预测会出NAN，暂时使用不带后处理Demo即可，带后处理的Demo正在升级中，很快发布。
+
+
+## 开始运行
+
+首先新建预测结果存放目录：
+```shell
+cp -r ../demo_onnxruntime/imgs .
+cd build
+mkdir ../results
 ```
 
-## Inference images
-
-```shell script
-picodet_demo 1 IMAGE_FOLDER/*.jpg
+- 预测一张图片
+``` shell
+./picodet_demo 0 ../picodet_s_320_coco_lcnet.bin ../picodet_s_320_coco_lcnet.param 320 320 ../imgs/dog.jpg 0
 ```
+具体参数解析可参考`main.cpp`。
 
-## Inference video
+-测试速度Benchmark
 
-```shell script
-picodet_demo 2 VIDEO_PATH
+``` shell
+./picodet_demo 1 ../picodet_s_320_lcnet.bin ../picodet_s_320_lcnet.param 320 320  0
 ```
 
-## Benchmark
-
-```shell script
-picodet_demo 3 0
-
-result: picodet  min = 17.74  max = 22.71  avg = 18.16
-```
-
-****
-
-Notice:
-
-If benchmark speed is slow, try to limit omp thread num.
-
-Linux:
+## FAQ
 
-```shell script
-export OMP_THREAD_LIMIT=4
+- 预测结果精度不对：
+请先确认模型输入shape是否对齐，并且模型输出name是否对齐，不带后处理的PicoDet增强版模型输出name如下：
+```shell
+# 分类分支  |  检测分支
+{"transpose_0.tmp_0", "transpose_1.tmp_0"},
+{"transpose_2.tmp_0", "transpose_3.tmp_0"},
+{"transpose_4.tmp_0", "transpose_5.tmp_0"},
+{"transpose_6.tmp_0", "transpose_7.tmp_0"},
 ```
+可使用[netron](https://netron.app)查看具体name，并修改`picodet_mnn.hpp`中相应`non_postprocess_heads_info`数组。
diff --git a/deploy/third_engine/demo_ncnn/main.cpp b/deploy/third_engine/demo_ncnn/main.cpp
index 2f98d82ae..8f69af93b 100644
--- a/deploy/third_engine/demo_ncnn/main.cpp
+++ b/deploy/third_engine/demo_ncnn/main.cpp
@@ -13,353 +13,198 @@
 // limitations under the License.
 // reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
 
+#include "picodet.h"
+#include <benchmark.h>
+#include <iostream>
+#include <net.h>
 #include <opencv2/core/core.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
-#include <iostream>
-#include <net.h>
-#include "picodet.h"
-#include <benchmark.h>
 
+#define __SAVE_RESULT__ // if defined save drawed results to ../results, else
+                        // show it in windows
 struct object_rect {
-    int x;
-    int y;
-    int width;
-    int height;
-};
-
-int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
-{
-    int w = src.cols;
-    int h = src.rows;
-    int dst_w = dst_size.width;
-    int dst_h = dst_size.height;
-    dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
-
-    float ratio_src = w * 1.0 / h;
-    float ratio_dst = dst_w * 1.0 / dst_h;
-
-    int tmp_w = 0;
-    int tmp_h = 0;
-    if (ratio_src > ratio_dst) {
-        tmp_w = dst_w;
-        tmp_h = floor((dst_w * 1.0 / w) * h);
-    }
-    else if (ratio_src < ratio_dst) {
-        tmp_h = dst_h;
-        tmp_w = floor((dst_h * 1.0 / h) * w);
-    }
-    else {
-        cv::resize(src, dst, dst_size);
-        effect_area.x = 0;
-        effect_area.y = 0;
-        effect_area.width = dst_w;
-        effect_area.height = dst_h;
-        return 0;
-    }
-
-    cv::Mat tmp;
-    cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
-
-    if (tmp_w != dst_w) {
-        int index_w = floor((dst_w - tmp_w) / 2.0);
-        for (int i = 0; i < dst_h; i++) {
-            memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
-        }
-        effect_area.x = index_w;
-        effect_area.y = 0;
-        effect_area.width = tmp_w;
-        effect_area.height = tmp_h;
-    }
-    else if (tmp_h != dst_h) {
-        int index_h = floor((dst_h - tmp_h) / 2.0);
-        memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
-        effect_area.x = 0;
-        effect_area.y = index_h;
-        effect_area.width = tmp_w;
-        effect_area.height = tmp_h;
-    }
-    else {
-        printf("error\n");
-    }
-    return 0;
-}
-
-const int color_list[80][3] =
-{
-    {216 , 82 , 24},
-    {236 ,176 , 31},
-    {125 , 46 ,141},
-    {118 ,171 , 47},
-    { 76 ,189 ,237},
-    {238 , 19 , 46},
-    { 76 , 76 , 76},
-    {153 ,153 ,153},
-    {255 ,  0 ,  0},
-    {255 ,127 ,  0},
-    {190 ,190 ,  0},
-    {  0 ,255 ,  0},
-    {  0 ,  0 ,255},
-    {170 ,  0 ,255},
-    { 84 , 84 ,  0},
-    { 84 ,170 ,  0},
-    { 84 ,255 ,  0},
-    {170 , 84 ,  0},
-    {170 ,170 ,  0},
-    {170 ,255 ,  0},
-    {255 , 84 ,  0},
-    {255 ,170 ,  0},
-    {255 ,255 ,  0},
-    {  0 , 84 ,127},
-    {  0 ,170 ,127},
-    {  0 ,255 ,127},
-    { 84 ,  0 ,127},
-    { 84 , 84 ,127},
-    { 84 ,170 ,127},
-    { 84 ,255 ,127},
-    {170 ,  0 ,127},
-    {170 , 84 ,127},
-    {170 ,170 ,127},
-    {170 ,255 ,127},
-    {255 ,  0 ,127},
-    {255 , 84 ,127},
-    {255 ,170 ,127},
-    {255 ,255 ,127},
-    {  0 , 84 ,255},
-    {  0 ,170 ,255},
-    {  0 ,255 ,255},
-    { 84 ,  0 ,255},
-    { 84 , 84 ,255},
-    { 84 ,170 ,255},
-    { 84 ,255 ,255},
-    {170 ,  0 ,255},
-    {170 , 84 ,255},
-    {170 ,170 ,255},
-    {170 ,255 ,255},
-    {255 ,  0 ,255},
-    {255 , 84 ,255},
-    {255 ,170 ,255},
-    { 42 ,  0 ,  0},
-    { 84 ,  0 ,  0},
-    {127 ,  0 ,  0},
-    {170 ,  0 ,  0},
-    {212 ,  0 ,  0},
-    {255 ,  0 ,  0},
-    {  0 , 42 ,  0},
-    {  0 , 84 ,  0},
-    {  0 ,127 ,  0},
-    {  0 ,170 ,  0},
-    {  0 ,212 ,  0},
-    {  0 ,255 ,  0},
-    {  0 ,  0 , 42},
-    {  0 ,  0 , 84},
-    {  0 ,  0 ,127},
-    {  0 ,  0 ,170},
-    {  0 ,  0 ,212},
-    {  0 ,  0 ,255},
-    {  0 ,  0 ,  0},
-    { 36 , 36 , 36},
-    { 72 , 72 , 72},
-    {109 ,109 ,109},
-    {145 ,145 ,145},
-    {182 ,182 ,182},
-    {218 ,218 ,218},
-    {  0 ,113 ,188},
-    { 80 ,182 ,188},
-    {127 ,127 ,  0},
+  int x;
+  int y;
+  int width;
+  int height;
 };
 
-void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi)
-{
-    static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
-                                        "train", "truck", "boat", "traffic light", "fire hydrant",
-                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
-                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
-                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
-                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
-                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
-                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
-                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
-                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
-                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
-                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
-                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
-                                        "scissors", "teddy bear", "hair drier", "toothbrush"
-    };
-
-    cv::Mat image = bgr.clone();
-    int src_w = image.cols;
-    int src_h = image.rows;
-    int dst_w = effect_roi.width;
-    int dst_h = effect_roi.height;
-    float width_ratio = (float)src_w / (float)dst_w;
-    float height_ratio = (float)src_h / (float)dst_h;
-
-
-    for (size_t i = 0; i < bboxes.size(); i++)
-    {
-        const BoxInfo& bbox = bboxes[i];
-        cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
-
-        cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
-                                      cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
-
-        char text[256];
-        sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
-
-        int baseLine = 0;
-        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
-
-        int x = (bbox.x1 - effect_roi.x) * width_ratio;
-        int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
-        if (y < 0)
-            y = 0;
-        if (x + label_size.width > image.cols)
-            x = image.cols - label_size.width;
-
-        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
-            color, -1);
-
-        cv::putText(image, text, cv::Point(x, y + label_size.height),
-            cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
-    }
-    cv::imwrite("../result/test_picodet.jpg", image);
-    printf("************infer image success!!!**********\n");
-}
-
-
-int image_demo(PicoDet &detector, const char* imagepath)
-{
-    std::vector<std::string> filenames;
-    cv::glob(imagepath, filenames, false);
-
-    for (auto img_name : filenames)
-    {
-        cv::Mat image = cv::imread(img_name);
-        if (image.empty())
-        {
-            fprintf(stderr, "cv::imread %s failed\n", img_name);
-            return -1;
-        }
-        object_rect effect_roi;
-        cv::Mat resized_img;
-        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
-        auto results = detector.detect(resized_img, 0.4, 0.5);
-        char imgName[20] = {};
-        draw_bboxes(image, results, effect_roi);
-        cv::waitKey(0);
-
+std::vector<int> GenerateColorMap(int num_class) {
+  auto colormap = std::vector<int>(3 * num_class, 0);
+  for (int i = 0; i < num_class; ++i) {
+    int j = 0;
+    int lab = i;
+    while (lab) {
+      colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
+      colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
+      colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
+      ++j;
+      lab >>= 3;
     }
-    return 0;
+  }
+  return colormap;
 }
 
-int webcam_demo(PicoDet& detector, int cam_id)
-{
-    cv::Mat image;
-    cv::VideoCapture cap(cam_id);
-
-    while (true)
-    {
-        cap >> image;
-        object_rect effect_roi;
-        cv::Mat resized_img;
-        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
-        auto results = detector.detect(resized_img, 0.4, 0.5);
-        draw_bboxes(image, results, effect_roi);
-        cv::waitKey(1);
-    }
-    return 0;
+void draw_bboxes(const cv::Mat &im, const std::vector<BoxInfo> &bboxes,
+                 std::string save_path = "None") {
+  static const char *class_names[] = {
+      "person",        "bicycle",      "car",
+      "motorcycle",    "airplane",     "bus",
+      "train",         "truck",        "boat",
+      "traffic light", "fire hydrant", "stop sign",
+      "parking meter", "bench",        "bird",
+      "cat",           "dog",          "horse",
+      "sheep",         "cow",          "elephant",
+      "bear",          "zebra",        "giraffe",
+      "backpack",      "umbrella",     "handbag",
+      "tie",           "suitcase",     "frisbee",
+      "skis",          "snowboard",    "sports ball",
+      "kite",          "baseball bat", "baseball glove",
+      "skateboard",    "surfboard",    "tennis racket",
+      "bottle",        "wine glass",   "cup",
+      "fork",          "knife",        "spoon",
+      "bowl",          "banana",       "apple",
+      "sandwich",      "orange",       "broccoli",
+      "carrot",        "hot dog",      "pizza",
+      "donut",         "cake",         "chair",
+      "couch",         "potted plant", "bed",
+      "dining table",  "toilet",       "tv",
+      "laptop",        "mouse",        "remote",
+      "keyboard",      "cell phone",   "microwave",
+      "oven",          "toaster",      "sink",
+      "refrigerator",  "book",         "clock",
+      "vase",          "scissors",     "teddy bear",
+      "hair drier",    "toothbrush"};
+
+  cv::Mat image = im.clone();
+  int src_w = image.cols;
+  int src_h = image.rows;
+  int thickness = 2;
+  auto colormap = GenerateColorMap(sizeof(class_names));
+
+  for (size_t i = 0; i < bboxes.size(); i++) {
+    const BoxInfo &bbox = bboxes[i];
+    std::cout << bbox.x1 << ". " << bbox.y1 << ". " << bbox.x2 << ". "
+              << bbox.y2 << ". " << std::endl;
+    int c1 = colormap[3 * bbox.label + 0];
+    int c2 = colormap[3 * bbox.label + 1];
+    int c3 = colormap[3 * bbox.label + 2];
+    cv::Scalar color = cv::Scalar(c1, c2, c3);
+    // cv::Scalar color = cv::Scalar(0, 0, 255);
+    cv::rectangle(image, cv::Rect(cv::Point(bbox.x1, bbox.y1),
+                                  cv::Point(bbox.x2, bbox.y2)),
+                  color, 1);
+
+    char text[256];
+    sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
+
+    int baseLine = 0;
+    cv::Size label_size =
+        cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+
+    int x = bbox.x1;
+    int y = bbox.y1 - label_size.height - baseLine;
+    if (y < 0)
+      y = 0;
+    if (x + label_size.width > image.cols)
+      x = image.cols - label_size.width;
+
+    cv::rectangle(image, cv::Rect(cv::Point(x, y),
+                                  cv::Size(label_size.width,
+                                           label_size.height + baseLine)),
+                  color, -1);
+
+    cv::putText(image, text, cv::Point(x, y + label_size.height),
+                cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255), 1);
+  }
+
+  if (save_path == "None") {
+    cv::imshow("image", image);
+  } else {
+    cv::imwrite(save_path, image);
+    std::cout << "Result save in: " << save_path << std::endl;
+  }
 }
 
-int video_demo(PicoDet& detector, const char* path)
-{
-    cv::Mat image;
-    cv::VideoCapture cap(path);
-
-    while (true)
-    {
-        cap >> image;
-        object_rect effect_roi;
-        cv::Mat resized_img;
-        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
-        auto results = detector.detect(resized_img, 0.4, 0.5);
-        draw_bboxes(image, results, effect_roi);
-        cv::waitKey(1);
+int image_demo(PicoDet &detector, const char *imagepath,
+               int has_postprocess = 0) {
+  std::vector<cv::String> filenames;
+  cv::glob(imagepath, filenames, false);
+  bool is_postprocess = has_postprocess > 0 ? true : false;
+  for (auto img_name : filenames) {
+    cv::Mat image = cv::imread(img_name, cv::IMREAD_COLOR);
+    if (image.empty()) {
+      fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
+      return -1;
     }
-    return 0;
+    std::vector<BoxInfo> results;
+    detector.detect(image, results, is_postprocess);
+    std::cout << "detect done." << std::endl;
+
+#ifdef __SAVE_RESULT__
+    std::string save_path = img_name;
+    draw_bboxes(image, results, save_path.replace(3, 4, "results"));
+#else
+    draw_bboxes(image, results);
+    cv::waitKey(0);
+#endif
+  }
+  return 0;
 }
 
-int benchmark(PicoDet& detector)
-{
-    int loop_num = 100;
-    int warm_up = 8;
-
-    double time_min = DBL_MAX;
-    double time_max = -DBL_MAX;
-    double time_avg = 0;
-    ncnn::Mat input = ncnn::Mat(320, 320, 3);
-    input.fill(0.01f);
-    for (int i = 0; i < warm_up + loop_num; i++)
-    {
-        double start = ncnn::get_current_time();
-        ncnn::Extractor ex = detector.Net->create_extractor();
-        ex.input("image", input); // picodet
-        for (const auto& head_info : detector.heads_info)
-        {
-            ncnn::Mat dis_pred;
-            ncnn::Mat cls_pred;
-            ex.extract(head_info.dis_layer.c_str(), dis_pred);
-            ex.extract(head_info.cls_layer.c_str(), cls_pred);
-        }
-        double end = ncnn::get_current_time();
-
-        double time = end - start;
-        if (i >= warm_up)
-        {
-            time_min = (std::min)(time_min, time);
-            time_max = (std::max)(time_max, time);
-            time_avg += time;
-        }
+int benchmark(PicoDet &detector, int width, int height,
+              int has_postprocess = 0) {
+  int loop_num = 100;
+  int warm_up = 8;
+
+  double time_min = DBL_MAX;
+  double time_max = -DBL_MAX;
+  double time_avg = 0;
+  cv::Mat image(width, height, CV_8UC3, cv::Scalar(1, 1, 1));
+  bool is_postprocess = has_postprocess > 0 ? true : false;
+  for (int i = 0; i < warm_up + loop_num; i++) {
+    double start = ncnn::get_current_time();
+    std::vector<BoxInfo> results;
+    detector.detect(image, results, is_postprocess);
+    double end = ncnn::get_current_time();
+
+    double time = end - start;
+    if (i >= warm_up) {
+      time_min = (std::min)(time_min, time);
+      time_max = (std::max)(time_max, time);
+      time_avg += time;
     }
-    time_avg /= loop_num;
-    fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
-    return 0;
+  }
+  time_avg /= loop_num;
+  fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet",
+          time_min, time_max, time_avg);
+  return 0;
 }
 
-
-int main(int argc, char** argv)
-{
-    if (argc != 3)
-    {
-        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
-        return -1;
-    }
-    PicoDet detector = PicoDet("../weight/picodet_m_416.param", "../weight/picodet_m_416.bin", true);
-    int mode = atoi(argv[1]);
-    switch (mode)
-    {
-    case 0:{
-        int cam_id = atoi(argv[2]);
-        webcam_demo(detector, cam_id);
-        break;
-        }
-    case 1:{
-        const char* images = argv[2];
-        image_demo(detector, images);
-        break;
-        }
-    case 2:{
-        const char* path = argv[2];
-        video_demo(detector, path);
-        break;
-        }
-    case 3:{
-        benchmark(detector);
-        break;
-        }
-    default:{
-        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
-        break;
-        }
+int main(int argc, char **argv) {
+  int mode = atoi(argv[1]);
+  char *bin_model_path = argv[2];
+  char *param_model_path = argv[3];
+  int height = 320;
+  int width = 320;
+  if (argc == 5) {
+    height = atoi(argv[4]);
+    width = atoi(argv[5]);
+  }
+  PicoDet detector =
+      PicoDet(param_model_path, bin_model_path, width, height, true, 0.45, 0.3);
+  if (mode == 1) {
+
+    benchmark(detector, width, height, atoi(argv[6]));
+  } else {
+    if (argc != 6) {
+      std::cout << "Must set image file, such as ./picodet_demo 0 "
+                   "../picodet_s_320_lcnet.bin ../picodet_s_320_lcnet.param "
+                   "320 320 img.jpg"
+                << std::endl;
     }
+    const char *images = argv[6];
+    image_demo(detector, images, atoi(argv[7]));
+  }
 }
diff --git a/deploy/third_engine/demo_ncnn/picodet.cpp b/deploy/third_engine/demo_ncnn/picodet.cpp
index c4dec46b2..d5f0ba3c7 100644
--- a/deploy/third_engine/demo_ncnn/picodet.cpp
+++ b/deploy/third_engine/demo_ncnn/picodet.cpp
@@ -48,7 +48,9 @@ int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
 bool PicoDet::hasGPU = false;
 PicoDet *PicoDet::detector = nullptr;
 
-PicoDet::PicoDet(const char *param, const char *bin, bool useGPU) {
+PicoDet::PicoDet(const char *param, const char *bin, int input_width,
+                 int input_hight, bool useGPU, float score_threshold_ = 0.5,
+                 float nms_threshold_ = 0.3) {
   this->Net = new ncnn::Net();
 #if NCNN_VULKAN
   this->hasGPU = ncnn::get_gpu_count() > 0;
@@ -57,21 +59,28 @@ PicoDet::PicoDet(const char *param, const char *bin, bool useGPU) {
   this->Net->opt.use_fp16_arithmetic = true;
   this->Net->load_param(param);
   this->Net->load_model(bin);
+  this->in_w = input_width;
+  this->in_h = input_hight;
+  this->score_threshold = score_threshold_;
+  this->nms_threshold = nms_threshold_;
 }
 
 PicoDet::~PicoDet() { delete this->Net; }
 
 void PicoDet::preprocess(cv::Mat &image, ncnn::Mat &in) {
+  // cv::resize(image, image, cv::Size(this->in_w, this->in_h), 0.f, 0.f);
   int img_w = image.cols;
   int img_h = image.rows;
-  in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h);
+  in = ncnn::Mat::from_pixels_resize(image.data, ncnn::Mat::PIXEL_BGR, img_w,
+                                     img_h, this->in_w, this->in_h);
   const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
   const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
   in.substract_mean_normalize(mean_vals, norm_vals);
 }
 
-std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold,
-                                     float nms_threshold) {
+int PicoDet::detect(cv::Mat image, std::vector<BoxInfo> &result_list,
+                    bool has_postprocess) {
+
   ncnn::Mat input;
   preprocess(image, input);
   auto ex = this->Net->create_extractor();
@@ -82,34 +91,76 @@ std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold,
 #endif
   ex.input("image", input); // picodet
 
+  this->image_h = image.rows;
+  this->image_w = image.cols;
+
   std::vector<std::vector<BoxInfo>> results;
   results.resize(this->num_class);
 
-  for (const auto &head_info : this->heads_info) {
+  if (has_postprocess) {
     ncnn::Mat dis_pred;
     ncnn::Mat cls_pred;
-    ex.extract(head_info.dis_layer.c_str(), dis_pred);
-    ex.extract(head_info.cls_layer.c_str(), cls_pred);
-    this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold,
-                       results);
+    ex.extract(this->nms_heads_info[0].c_str(), dis_pred);
+    ex.extract(this->nms_heads_info[1].c_str(), cls_pred);
+    std::cout << dis_pred.h << "  " << dis_pred.w << std::endl;
+    std::cout << cls_pred.h << "  " << cls_pred.w << std::endl;
+    this->nms_boxes(cls_pred, dis_pred, this->score_threshold, results);
+  } else {
+    for (const auto &head_info : this->non_postprocess_heads_info) {
+      ncnn::Mat dis_pred;
+      ncnn::Mat cls_pred;
+      ex.extract(head_info.dis_layer.c_str(), dis_pred);
+      ex.extract(head_info.cls_layer.c_str(), cls_pred);
+      this->decode_infer(cls_pred, dis_pred, head_info.stride,
+                         this->score_threshold, results);
+    }
   }
 
-  std::vector<BoxInfo> dets;
   for (int i = 0; i < (int)results.size(); i++) {
-    this->nms(results[i], nms_threshold);
+    this->nms(results[i], this->nms_threshold);
 
     for (auto box : results[i]) {
-      dets.push_back(box);
+      box.x1 = box.x1 / this->in_w * this->image_w;
+      box.x2 = box.x2 / this->in_w * this->image_w;
+      box.y1 = box.y1 / this->in_h * this->image_h;
+      box.y2 = box.y2 / this->in_h * this->image_h;
+      result_list.push_back(box);
+    }
+  }
+  return 0;
+}
+
+void PicoDet::nms_boxes(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred,
+                        float score_threshold,
+                        std::vector<std::vector<BoxInfo>> &result_list) {
+  BoxInfo bbox;
+  int i, j;
+  for (i = 0; i < dis_pred.h; i++) {
+    bbox.x1 = dis_pred.row(i)[0];
+    bbox.y1 = dis_pred.row(i)[1];
+    bbox.x2 = dis_pred.row(i)[2];
+    bbox.y2 = dis_pred.row(i)[3];
+    const float *scores = cls_pred.row(i);
+    float score = 0;
+    int cur_label = 0;
+    for (int label = 0; label < this->num_class; label++) {
+      float score_ = cls_pred.row(label)[i];
+      if (score_ > score) {
+        score = score_;
+        cur_label = label;
+      }
     }
+    bbox.score = score;
+    bbox.label = cur_label;
+    result_list[cur_label].push_back(bbox);
   }
-  return dets;
 }
 
 void PicoDet::decode_infer(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, int stride,
                            float threshold,
                            std::vector<std::vector<BoxInfo>> &results) {
-  int feature_h = ceil((float)this->input_size[1] / stride);
-  int feature_w = ceil((float)this->input_size[0] / stride);
+  int feature_h = ceil((float)this->in_w / stride);
+  int feature_w = ceil((float)this->in_h / stride);
 
   for (int idx = 0; idx < feature_h * feature_w; idx++) {
     const float *scores = cls_pred.row(idx);
@@ -151,8 +202,8 @@ BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score,
   }
   float xmin = (std::max)(ct_x - dis_pred[0], .0f);
   float ymin = (std::max)(ct_y - dis_pred[1], .0f);
-  float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]);
-  float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]);
+  float xmax = (std::min)(ct_x + dis_pred[2], (float)this->in_w);
+  float ymax = (std::min)(ct_y + dis_pred[3], (float)this->in_w);
   return BoxInfo{xmin, ymin, xmax, ymax, score, label};
 }
 
diff --git a/deploy/third_engine/demo_ncnn/picodet.h b/deploy/third_engine/demo_ncnn/picodet.h
index dfb0967c9..dd8c8f5af 100644
--- a/deploy/third_engine/demo_ncnn/picodet.h
+++ b/deploy/third_engine/demo_ncnn/picodet.h
@@ -16,66 +16,72 @@
 #ifndef PICODET_H
 #define PICODET_H
 
-#include <opencv2/core/core.hpp>
 #include <net.h>
+#include <opencv2/core/core.hpp>
 
-typedef struct HeadInfo
-{
-    std::string cls_layer;
-    std::string dis_layer;
-    int stride;
-};
+typedef struct NonPostProcessHeadInfo {
+  std::string cls_layer;
+  std::string dis_layer;
+  int stride;
+} NonPostProcessHeadInfo;
 
-typedef struct BoxInfo
-{
-    float x1;
-    float y1;
-    float x2;
-    float y2;
-    float score;
-    int label;
+typedef struct BoxInfo {
+  float x1;
+  float y1;
+  float x2;
+  float y2;
+  float score;
+  int label;
 } BoxInfo;
 
-class PicoDet
-{
+class PicoDet {
 public:
-    PicoDet(const char* param, const char* bin, bool useGPU);
-
-    ~PicoDet();
+  PicoDet(const char *param, const char *bin, int input_width, int input_hight,
+          bool useGPU, float score_threshold_, float nms_threshold_);
 
-    static PicoDet* detector;
-    ncnn::Net* Net;
-    static bool hasGPU;
+  ~PicoDet();
 
-    std::vector<HeadInfo> heads_info{
-        // cls_pred|dis_pred|stride
-        {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
-        {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
-        {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
-        {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
-    };
+  static PicoDet *detector;
+  ncnn::Net *Net;
+  static bool hasGPU;
 
-    std::vector<BoxInfo> detect(cv::Mat image, float score_threshold, float nms_threshold);
+  int detect(cv::Mat image, std::vector<BoxInfo> &result_list,
+             bool has_postprocess);
 
-    std::vector<std::string> labels{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
-                                    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
-                                    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
-                                    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
-                                    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
-                                    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
-                                    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
-                                    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
-                                    "hair drier", "toothbrush" };
 private:
-    void preprocess(cv::Mat& image, ncnn::Mat& in);
-    void decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results);
-    BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride);
-    static void nms(std::vector<BoxInfo>& result, float nms_threshold);
-    int input_size[2] = {320, 320};
-    int num_class = 80;
-    int reg_max = 7;
+  void preprocess(cv::Mat &image, ncnn::Mat &in);
+  void decode_infer(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, int stride,
+                    float threshold,
+                    std::vector<std::vector<BoxInfo>> &results);
+  BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x,
+                       int y, int stride);
+  static void nms(std::vector<BoxInfo> &result, float nms_threshold);
+  void nms_boxes(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred,
+                 float score_threshold,
+                 std::vector<std::vector<BoxInfo>> &result_list);
 
-};
+  int image_w;
+  int image_h;
+  int in_w = 320;
+  int in_h = 320;
+  int num_class = 80;
+  int reg_max = 7;
+
+  float score_threshold;
+  float nms_threshold;
 
+  std::vector<float> bbox_output_data_;
+  std::vector<float> class_output_data_;
+
+  std::vector<std::string> nms_heads_info{"tmp_16", "concat_4.tmp_0"};
+  // If not export post-process, will use non_postprocess_heads_info
+  std::vector<NonPostProcessHeadInfo> non_postprocess_heads_info{
+      // cls_pred|dis_pred|stride
+      {"transpose_0.tmp_0", "transpose_1.tmp_0", 8},
+      {"transpose_2.tmp_0", "transpose_3.tmp_0", 16},
+      {"transpose_4.tmp_0", "transpose_5.tmp_0", 32},
+      {"transpose_6.tmp_0", "transpose_7.tmp_0", 64},
+  };
+};
 
 #endif
diff --git a/deploy/third_engine/demo_ncnn/python/demo_ncnn.py b/deploy/third_engine/demo_ncnn/python/demo_ncnn.py
deleted file mode 100644
index 492eb1e0d..000000000
--- a/deploy/third_engine/demo_ncnn/python/demo_ncnn.py
+++ /dev/null
@@ -1,808 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
-
-# -*- coding: utf-8 -*-
-import argparse
-from abc import ABCMeta, abstractmethod
-from pathlib import Path
-
-import cv2
-import matplotlib.pyplot as plt
-import numpy as np
-from scipy.special import softmax
-from tqdm import tqdm
-
-_COLORS = (np.array([
-    0.000,
-    0.447,
-    0.741,
-    0.850,
-    0.325,
-    0.098,
-    0.929,
-    0.694,
-    0.125,
-    0.494,
-    0.184,
-    0.556,
-    0.466,
-    0.674,
-    0.188,
-    0.301,
-    0.745,
-    0.933,
-    0.635,
-    0.078,
-    0.184,
-    0.300,
-    0.300,
-    0.300,
-    0.600,
-    0.600,
-    0.600,
-    1.000,
-    0.000,
-    0.000,
-    1.000,
-    0.500,
-    0.000,
-    0.749,
-    0.749,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    1.000,
-    0.667,
-    0.000,
-    1.000,
-    0.333,
-    0.333,
-    0.000,
-    0.333,
-    0.667,
-    0.000,
-    0.333,
-    1.000,
-    0.000,
-    0.667,
-    0.333,
-    0.000,
-    0.667,
-    0.667,
-    0.000,
-    0.667,
-    1.000,
-    0.000,
-    1.000,
-    0.333,
-    0.000,
-    1.000,
-    0.667,
-    0.000,
-    1.000,
-    1.000,
-    0.000,
-    0.000,
-    0.333,
-    0.500,
-    0.000,
-    0.667,
-    0.500,
-    0.000,
-    1.000,
-    0.500,
-    0.333,
-    0.000,
-    0.500,
-    0.333,
-    0.333,
-    0.500,
-    0.333,
-    0.667,
-    0.500,
-    0.333,
-    1.000,
-    0.500,
-    0.667,
-    0.000,
-    0.500,
-    0.667,
-    0.333,
-    0.500,
-    0.667,
-    0.667,
-    0.500,
-    0.667,
-    1.000,
-    0.500,
-    1.000,
-    0.000,
-    0.500,
-    1.000,
-    0.333,
-    0.500,
-    1.000,
-    0.667,
-    0.500,
-    1.000,
-    1.000,
-    0.500,
-    0.000,
-    0.333,
-    1.000,
-    0.000,
-    0.667,
-    1.000,
-    0.000,
-    1.000,
-    1.000,
-    0.333,
-    0.000,
-    1.000,
-    0.333,
-    0.333,
-    1.000,
-    0.333,
-    0.667,
-    1.000,
-    0.333,
-    1.000,
-    1.000,
-    0.667,
-    0.000,
-    1.000,
-    0.667,
-    0.333,
-    1.000,
-    0.667,
-    0.667,
-    1.000,
-    0.667,
-    1.000,
-    1.000,
-    1.000,
-    0.000,
-    1.000,
-    1.000,
-    0.333,
-    1.000,
-    1.000,
-    0.667,
-    1.000,
-    0.333,
-    0.000,
-    0.000,
-    0.500,
-    0.000,
-    0.000,
-    0.667,
-    0.000,
-    0.000,
-    0.833,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    0.167,
-    0.000,
-    0.000,
-    0.333,
-    0.000,
-    0.000,
-    0.500,
-    0.000,
-    0.000,
-    0.667,
-    0.000,
-    0.000,
-    0.833,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    0.167,
-    0.000,
-    0.000,
-    0.333,
-    0.000,
-    0.000,
-    0.500,
-    0.000,
-    0.000,
-    0.667,
-    0.000,
-    0.000,
-    0.833,
-    0.000,
-    0.000,
-    1.000,
-    0.000,
-    0.000,
-    0.000,
-    0.143,
-    0.143,
-    0.143,
-    0.286,
-    0.286,
-    0.286,
-    0.429,
-    0.429,
-    0.429,
-    0.571,
-    0.571,
-    0.571,
-    0.714,
-    0.714,
-    0.714,
-    0.857,
-    0.857,
-    0.857,
-    0.000,
-    0.447,
-    0.741,
-    0.314,
-    0.717,
-    0.741,
-    0.50,
-    0.5,
-    0,
-]).astype(np.float32).reshape(-1, 3))
-
-
-def get_resize_matrix(raw_shape, dst_shape, keep_ratio):
-    """
-    Get resize matrix for resizing raw img to input size
-    :param raw_shape: (width, height) of raw image
-    :param dst_shape: (width, height) of input image
-    :param keep_ratio: whether keep original ratio
-    :return: 3x3 Matrix
-    """
-    r_w, r_h = raw_shape
-    d_w, d_h = dst_shape
-    Rs = np.eye(3)
-    if keep_ratio:
-        C = np.eye(3)
-        C[0, 2] = -r_w / 2
-        C[1, 2] = -r_h / 2
-
-        if r_w / r_h < d_w / d_h:
-            ratio = d_h / r_h
-        else:
-            ratio = d_w / r_w
-        Rs[0, 0] *= ratio
-        Rs[1, 1] *= ratio
-
-        T = np.eye(3)
-        T[0, 2] = 0.5 * d_w
-        T[1, 2] = 0.5 * d_h
-        return T @Rs @C
-    else:
-        Rs[0, 0] *= d_w / r_w
-        Rs[1, 1] *= d_h / r_h
-        return Rs
-
-
-def warp_boxes(boxes, M, width, height):
-    """Apply transform to boxes
-    Copy from picodet/data/transform/warp.py
-    """
-    n = len(boxes)
-    if n:
-        # warp points
-        xy = np.ones((n * 4, 3))
-        xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
-            n * 4, 2)  # x1y1, x2y2, x1y2, x2y1
-        xy = xy @M.T  # transform
-        xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8)  # rescale
-        # create new boxes
-        x = xy[:, [0, 2, 4, 6]]
-        y = xy[:, [1, 3, 5, 7]]
-        xy = np.concatenate(
-            (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-        # clip boxes
-        xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
-        xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
-        return xy.astype(np.float32)
-    else:
-        return boxes
-
-
-def overlay_bbox_cv(img, all_box, class_names):
-    """Draw result boxes
-    Copy from picodet/util/visualization.py
-    """
-    all_box.sort(key=lambda v: v[5])
-    for box in all_box:
-        label, x0, y0, x1, y1, score = box
-        color = (_COLORS[label] * 255).astype(np.uint8).tolist()
-        text = "{}:{:.1f}%".format(class_names[label], score * 100)
-        txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255,
-                                                                     255)
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        txt_size = cv2.getTextSize(text, font, 0.5, 2)[0]
-        cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
-
-        cv2.rectangle(
-            img,
-            (x0, y0 - txt_size[1] - 1),
-            (x0 + txt_size[0] + txt_size[1], y0 - 1),
-            color,
-            -1, )
-        cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1)
-    return img
-
-
-def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
-    """
-
-    Args:
-        box_scores (N, 5): boxes in corner-form and probabilities.
-        iou_threshold: intersection over union threshold.
-        top_k: keep top_k results. If k <= 0, keep all the results.
-        candidate_size: only consider the candidates with the highest scores.
-    Returns:
-         picked: a list of indexes of the kept boxes
-    """
-    scores = box_scores[:, -1]
-    boxes = box_scores[:, :-1]
-    picked = []
-    indexes = np.argsort(scores)
-    indexes = indexes[-candidate_size:]
-    while len(indexes) > 0:
-        current = indexes[-1]
-        picked.append(current)
-        if 0 < top_k == len(picked) or len(indexes) == 1:
-            break
-        current_box = boxes[current, :]
-        indexes = indexes[:-1]
-        rest_boxes = boxes[indexes, :]
-        iou = iou_of(
-            rest_boxes,
-            np.expand_dims(
-                current_box, axis=0), )
-        indexes = indexes[iou <= iou_threshold]
-
-    return box_scores[picked, :]
-
-
-def iou_of(boxes0, boxes1, eps=1e-5):
-    """Return intersection-over-union (Jaccard index) of boxes.
-
-    Args:
-        boxes0 (N, 4): ground truth boxes.
-        boxes1 (N or 1, 4): predicted boxes.
-        eps: a small number to avoid 0 as denominator.
-    Returns:
-        iou (N): IoU values.
-    """
-    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
-    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
-
-    overlap_area = area_of(overlap_left_top, overlap_right_bottom)
-    area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
-    area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
-    return overlap_area / (area0 + area1 - overlap_area + eps)
-
-
-def area_of(left_top, right_bottom):
-    """Compute the areas of rectangles given two corners.
-
-    Args:
-        left_top (N, 2): left top corner.
-        right_bottom (N, 2): right bottom corner.
-
-    Returns:
-        area (N): return the area.
-    """
-    hw = np.clip(right_bottom - left_top, 0.0, None)
-    return hw[..., 0] * hw[..., 1]
-
-
-class picodetABC(metaclass=ABCMeta):
-    def __init__(
-            self,
-            input_shape=[320, 320],
-            reg_max=7,
-            strides=[8, 16, 32],
-            prob_threshold=0.4,
-            iou_threshold=0.3,
-            num_candidate=1000,
-            top_k=-1, ):
-        self.strides = strides
-        self.input_shape = input_shape
-        self.reg_max = reg_max
-        self.prob_threshold = prob_threshold
-        self.iou_threshold = iou_threshold
-        self.num_candidate = num_candidate
-        self.top_k = top_k
-        self.img_mean = [103.53, 116.28, 123.675]
-        self.img_std = [57.375, 57.12, 58.395]
-        self.input_size = (self.input_shape[1], self.input_shape[0])
-        self.class_names = [
-            "person",
-            "bicycle",
-            "car",
-            "motorcycle",
-            "airplane",
-            "bus",
-            "train",
-            "truck",
-            "boat",
-            "traffic_light",
-            "fire_hydrant",
-            "stop_sign",
-            "parking_meter",
-            "bench",
-            "bird",
-            "cat",
-            "dog",
-            "horse",
-            "sheep",
-            "cow",
-            "elephant",
-            "bear",
-            "zebra",
-            "giraffe",
-            "backpack",
-            "umbrella",
-            "handbag",
-            "tie",
-            "suitcase",
-            "frisbee",
-            "skis",
-            "snowboard",
-            "sports_ball",
-            "kite",
-            "baseball_bat",
-            "baseball_glove",
-            "skateboard",
-            "surfboard",
-            "tennis_racket",
-            "bottle",
-            "wine_glass",
-            "cup",
-            "fork",
-            "knife",
-            "spoon",
-            "bowl",
-            "banana",
-            "apple",
-            "sandwich",
-            "orange",
-            "broccoli",
-            "carrot",
-            "hot_dog",
-            "pizza",
-            "donut",
-            "cake",
-            "chair",
-            "couch",
-            "potted_plant",
-            "bed",
-            "dining_table",
-            "toilet",
-            "tv",
-            "laptop",
-            "mouse",
-            "remote",
-            "keyboard",
-            "cell_phone",
-            "microwave",
-            "oven",
-            "toaster",
-            "sink",
-            "refrigerator",
-            "book",
-            "clock",
-            "vase",
-            "scissors",
-            "teddy_bear",
-            "hair_drier",
-            "toothbrush",
-        ]
-
-    def preprocess(self, img):
-        # resize image
-        ResizeM = get_resize_matrix((img.shape[1], img.shape[0]),
-                                    self.input_size, True)
-        img_resize = cv2.warpPerspective(img, ResizeM, dsize=self.input_size)
-        # normalize image
-        img_input = img_resize.astype(np.float32) / 255
-        img_mean = np.array(
-            self.img_mean, dtype=np.float32).reshape(1, 1, 3) / 255
-        img_std = np.array(
-            self.img_std, dtype=np.float32).reshape(1, 1, 3) / 255
-        img_input = (img_input - img_mean) / img_std
-        # expand dims
-        img_input = np.transpose(img_input, [2, 0, 1])
-        img_input = np.expand_dims(img_input, axis=0)
-        return img_input, ResizeM
-
-    def postprocess(self, scores, raw_boxes, ResizeM, raw_shape):
-        # generate centers
-        decode_boxes = []
-        select_scores = []
-        for stride, box_distribute, score in zip(self.strides, raw_boxes,
-                                                 scores):
-            # centers
-            fm_h = self.input_shape[0] / stride
-            fm_w = self.input_shape[1] / stride
-            h_range = np.arange(fm_h)
-            w_range = np.arange(fm_w)
-            ww, hh = np.meshgrid(w_range, h_range)
-            ct_row = (hh.flatten() + 0.5) * stride
-            ct_col = (ww.flatten() + 0.5) * stride
-            center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
-
-            # box distribution to distance
-            reg_range = np.arange(self.reg_max + 1)
-            box_distance = box_distribute.reshape((-1, self.reg_max + 1))
-            box_distance = softmax(box_distance, axis=1)
-            box_distance = box_distance * np.expand_dims(reg_range, axis=0)
-            box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
-            box_distance = box_distance * stride
-
-            # top K candidate
-            topk_idx = np.argsort(score.max(axis=1))[::-1]
-            topk_idx = topk_idx[:self.num_candidate]
-            center = center[topk_idx]
-            score = score[topk_idx]
-            box_distance = box_distance[topk_idx]
-
-            # decode box
-            decode_box = center + [-1, -1, 1, 1] * box_distance
-
-            select_scores.append(score)
-            decode_boxes.append(decode_box)
-
-        # nms
-        bboxes = np.concatenate(decode_boxes, axis=0)
-        confidences = np.concatenate(select_scores, axis=0)
-        picked_box_probs = []
-        picked_labels = []
-        for class_index in range(0, confidences.shape[1]):
-            probs = confidences[:, class_index]
-            mask = probs > self.prob_threshold
-            probs = probs[mask]
-            if probs.shape[0] == 0:
-                continue
-            subset_boxes = bboxes[mask, :]
-            box_probs = np.concatenate(
-                [subset_boxes, probs.reshape(-1, 1)], axis=1)
-            box_probs = hard_nms(
-                box_probs,
-                iou_threshold=self.iou_threshold,
-                top_k=self.top_k, )
-            picked_box_probs.append(box_probs)
-            picked_labels.extend([class_index] * box_probs.shape[0])
-        if not picked_box_probs:
-            return np.array([]), np.array([]), np.array([])
-        picked_box_probs = np.concatenate(picked_box_probs)
-
-        # resize output boxes
-        picked_box_probs[:, :4] = warp_boxes(picked_box_probs[:, :4],
-                                             np.linalg.inv(ResizeM),
-                                             raw_shape[1], raw_shape[0])
-        return (
-            picked_box_probs[:, :4].astype(np.int32),
-            np.array(picked_labels),
-            picked_box_probs[:, 4], )
-
-    @abstractmethod
-    def infer_image(self, img_input):
-        pass
-
-    def detect(self, img):
-        raw_shape = img.shape
-        img_input, ResizeM = self.preprocess(img)
-        scores, raw_boxes = self.infer_image(img_input)
-        if scores[0].ndim == 1:  # handling num_classes=1 case
-            scores = [x[:, None] for x in scores]
-        bbox, label, score = self.postprocess(scores, raw_boxes, ResizeM,
-                                              raw_shape)
-        return bbox, label, score
-
-    def draw_box(self, raw_img, bbox, label, score):
-        img = raw_img.copy()
-        all_box = [[x, ] + y + [z, ]
-                   for x, y, z in zip(label, bbox.tolist(), score)]
-        img_draw = overlay_bbox_cv(img, all_box, self.class_names)
-        return img_draw
-
-    def detect_folder(self, img_fold, result_path):
-        img_fold = Path(img_fold)
-        result_path = Path(result_path)
-        result_path.mkdir(parents=True, exist_ok=True)
-
-        img_name_list = filter(
-            lambda x: str(x).endswith(".png") or str(x).endswith(".jpg"),
-            img_fold.iterdir(), )
-        img_name_list = list(img_name_list)
-        print(f"find {len(img_name_list)} images")
-
-        for img_path in tqdm(img_name_list):
-            img = cv2.imread(str(img_path))
-            bbox, label, score = self.detect(img)
-            img_draw = self.draw_box(img, bbox, label, score)
-            save_path = str(result_path / img_path.name.replace(".png", ".jpg"))
-            cv2.imwrite(save_path, img_draw)
-
-
-class picodetONNX(picodetABC):
-    def __init__(self, model_path, *args, **kwargs):
-        import onnxruntime as ort
-
-        super(picodetONNX, self).__init__(*args, **kwargs)
-        print("Using ONNX as inference backend")
-        print(f"Using weight: {model_path}")
-
-        # load model
-        self.model_path = model_path
-        self.ort_session = ort.InferenceSession(self.model_path)
-        self.input_name = self.ort_session.get_inputs()[0].name
-
-    def infer_image(self, img_input):
-        inference_results = self.ort_session.run(None,
-                                                 {self.input_name: img_input})
-        scores = [np.squeeze(x) for x in inference_results[:3]]
-        raw_boxes = [np.squeeze(x) for x in inference_results[3:]]
-        return scores, raw_boxes
-
-
-class picodetTorch(picodetABC):
-    def __init__(self, model_path, cfg_path, *args, **kwargs):
-        import torch
-
-        from picodet.model.arch import build_model
-        from picodet.util import Logger, cfg, load_config, load_model_weight
-
-        super(picodetTorch, self).__init__(*args, **kwargs)
-        print("Using PyTorch as inference backend")
-        print(f"Using weight: {model_path}")
-
-        # load model
-        self.model_path = model_path
-        self.cfg_path = cfg_path
-        load_config(cfg, cfg_path)
-        self.logger = Logger(-1, cfg.save_dir, False)
-        self.model = build_model(cfg.model)
-        checkpoint = torch.load(
-            model_path, map_location=lambda storage, loc: storage)
-        load_model_weight(self.model, checkpoint, self.logger)
-
-    def infer_image(self, img_input):
-        import torch
-
-        self.model.train(False)
-        with torch.no_grad():
-            inference_results = self.model(torch.from_numpy(img_input))
-        scores = [
-            x.permute(0, 2, 3, 1).reshape((-1, 80)).sigmoid().detach().numpy()
-            for x in inference_results[0]
-        ]
-        raw_boxes = [
-            x.permute(0, 2, 3, 1).reshape((-1, 32)).detach().numpy()
-            for x in inference_results[1]
-        ]
-        return scores, raw_boxes
-
-
-class picodetNCNN(picodetABC):
-    def __init__(self, model_param, model_bin, *args, **kwargs):
-        import ncnn
-
-        super(picodetNCNN, self).__init__(*args, **kwargs)
-        print("Using ncnn as inference backend")
-        print(f"Using param: {model_param}, bin: {model_bin}")
-
-        # load model
-        self.model_param = model_param
-        self.model_bin = model_bin
-
-        self.net = ncnn.Net()
-        self.net.load_param(model_param)
-        self.net.load_model(model_bin)
-        self.input_name = "input.1"
-
-    def infer_image(self, img_input):
-        import ncnn
-
-        mat_in = ncnn.Mat(img_input.squeeze())
-        ex = self.net.create_extractor()
-        ex.input(self.input_name, mat_in)
-
-        score_out_name = [
-            "save_infer_model/scale_0.tmp_1", "save_infer_model/scale_1.tmp_1",
-            "save_infer_model/scale_2.tmp_1", "save_infer_model/scale_3.tmp_1"
-        ]
-        scores = [np.array(ex.extract(x)[1]) for x in score_out_name]
-        scores = [np.reshape(x, (-1, 80)) for x in scores]
-
-        boxes_out_name = [
-            "save_infer_model/scale_4.tmp_1", "save_infer_model/scale_5.tmp_1",
-            "save_infer_model/scale_6.tmp_1", "save_infer_model/scale_7.tmp_1"
-        ]
-        raw_boxes = [np.array(ex.extract(x)[1]) for x in boxes_out_name]
-        raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes]
-
-        return scores, raw_boxes
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--model_path",
-        dest="model_path",
-        type=str,
-        default="../model/picodet.param")
-    parser.add_argument(
-        "--model_bin",
-        dest="model_bin",
-        type=str,
-        default="../model/picodet.bin")
-    parser.add_argument(
-        "--cfg_path", dest="cfg_path", type=str, default="config/picodet.yml")
-    parser.add_argument(
-        "--img_fold", dest="img_fold", type=str, default="../imgs")
-    parser.add_argument(
-        "--result_fold", dest="result_fold", type=str, default="../results")
-    parser.add_argument(
-        "--input_shape",
-        dest="input_shape",
-        nargs=2,
-        type=int,
-        default=[320, 320])
-    parser.add_argument(
-        "--backend", choices=["ncnn", "ONNX", "torch"], default="ncnn")
-    args = parser.parse_args()
-
-    print(f"Detecting {args.img_fold}")
-
-    # load detector
-    if args.backend == "ncnn":
-        detector = picodetNCNN(
-            args.model_path, args.model_bin, input_shape=args.input_shape)
-    elif args.backend == "ONNX":
-        detector = picodetONNX(args.model_path, input_shape=args.input_shape)
-    elif args.backend == "torch":
-        detector = picodetTorch(
-            args.model_path, args.cfg_path, input_shape=args.input_shape)
-    else:
-        raise ValueError
-
-    # detect folder
-    detector.detect_folder(args.img_fold, args.result_fold)
-
-
-def test_one():
-    detector = picodetNCNN("../weight/picodet_m_416.param",
-                           "../weight/picodet_m_416.bin")
-    img = cv2.imread("../000000000102.jpg")
-    bbox, label, score = detector.detect(img)
-    img_draw = detector.draw_box(img, bbox, label, score)
-    img_out = img_draw[..., ::-1]
-    cv2.imwrite('python_version.jpg', img_out)
-
-
-if __name__ == "__main__":
-    # main()
-    test_one()
-- 
GitLab