add 3rd infer engine (#4336)

* add 3rd infer engine

add 3rd infer engine (#4336)
* add 3rd infer engine
34654225 · qq_30618961 · GitHub · 3ee7bde2 · 34654225 · 34654225
17 changed file
--- a/deploy/third_engine/demo_mnn/CMakeLists.txt
+++ b/deploy/third_engine/demo_mnn/CMakeLists.txt
+cmake_minimum_required(VERSION 3.9)
+project(picodet-mnn)
+set(CMAKE_CXX_STANDARD 17)
+# find_package(OpenCV REQUIRED PATHS "/work/dependence/opencv/opencv-3.4.3/build")
+find_package(OpenCV REQUIRED)
+include_directories(
+        /path/to/MNN/include/MNN
+        /path/to/MNN/include
+        .
+)
+link_directories(mnn/lib)
+add_library(libMNN SHARED IMPORTED)
+set_target_properties(
+        libMNN
+        PROPERTIES IMPORTED_LOCATION
+        ${CMAKE_SOURCE_DIR}/mnn/lib/libMNN.so
+)
+add_executable(picodet-mnn main.cpp picodet_mnn.cpp)
+target_link_libraries(picodet-mnn MNN ${OpenCV_LIBS} libMNN.so)
--- a/deploy/third_engine/demo_mnn/README.md
+++ b/deploy/third_engine/demo_mnn/README.md
+# PicoDet MNN Demo
+This fold provides PicoDet inference code using
+[Alibaba's MNN framework](https://github.com/alibaba/MNN). Most of the implements in
+this fold are same as *demo_ncnn*.
+## Install MNN
+### Python library
+Just run:
+``` shell
+pip install MNN
+```
+### C++ library
+Please follow the [official document](https://www.yuque.com/mnn/en/build_linux) to build MNN engine.
+- Create picodet_m_416_coco.onnx
+    ```shell
+    modelName=picodet_m_416_coco
+    # export model
+    python tools/export_model.py \
+            -c configs/picodet/${modelName}.yml \
+            -o weights=${modelName}.pdparams \
+            --output_dir=inference_model
+    # convert to onnx
+    paddle2onnx --model_dir inference_model/${modelName} \
+            --model_filename model.pdmodel  \
+            --params_filename model.pdiparams \
+            --opset_version 11 \
+            --save_file ${modelName}.onnx
+    # onnxsim
+    python -m onnxsim ${modelName}.onnx ${modelName}_processed.onnx
+    ```
+- Convert model
+   ``` shell
+   python -m MNN.tools.mnnconvert -f ONNX --modelFile picodet-416.onnx --MNNModel picodet-416.mnn
+   ```
+Here are converted model [download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416.mnn).
+## Build
+The python code *demo_mnn.py* can run directly and independently without main PicoDet repo.
+`PicoDetONNX` and `PicoDetTorch` are two classes used to check the similarity of MNN inference results
+with ONNX model and Pytorch model. They can be remove with no side effects.
+For C++ code, replace `libMNN.so` under *./mnn/lib* with the one you just compiled, modify OpenCV path and MNN path at CMake file,
+and run
+``` shell
+mkdir build && cd build
+cmake ..
+make
+```
+Note that a flag at `main.cpp` is used to control whether to show the detection result or save it into a fold.
+``` c++
+#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows
+```
+## Run
+### Python
+`demo_mnn.py` provide an inference class `PicoDetMNN` that combines preprocess, post process, visualization.
+Besides it can be used in command line with the form:
+```shell
+demo_mnn.py [-h] [--model_path MODEL_PATH] [--cfg_path CFG_PATH]
+    [--img_fold IMG_FOLD] [--result_fold RESULT_FOLD]
+    [--input_shape INPUT_SHAPE INPUT_SHAPE]
+    [--backend {MNN,ONNX,torch}]
+```
+For example:
+``` shell
+# run MNN 416 model
+python ./demo_mnn.py --model_path ../model/picodet-416.mnn --img_fold ../imgs --result_fold ../results
+# run MNN 320 model
+python ./demo_mnn.py --model_path ../model/picodet-320.mnn --input_shape 320 320 --backend MNN
+# run onnx model
+python ./demo_mnn.py --model_path ../model/sim.onnx --backend ONNX
+```
+### C++
+C++ inference interface is same with NCNN code, to detect images in a fold, run:
+``` shell
+./picodet-mnn "1" "../imgs/test.jpg"
+```
+For speed benchmark
+``` shell
+./picodet-mnn "3" "0"
+```
+## Reference
+[MNN](https://github.com/alibaba/MNN)
--- a/deploy/third_engine/demo_mnn/main.cpp
+++ b/deploy/third_engine/demo_mnn/main.cpp
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
+#include "picodet_mnn.hpp"
+#include <iostream>
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows
+struct object_rect {
+    int x;
+    int y;
+    int width;
+    int height;
+};
+int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
+{
+    int w = src.cols;
+    int h = src.rows;
+    int dst_w = dst_size.width;
+    int dst_h = dst_size.height;
+    dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
+    float ratio_src = w * 1.0 / h;
+    float ratio_dst = dst_w * 1.0 / dst_h;
+    int tmp_w = 0;
+    int tmp_h = 0;
+    if (ratio_src > ratio_dst) {
+        tmp_w = dst_w;
+        tmp_h = floor((dst_w * 1.0 / w) * h);
+    }
+    else if (ratio_src < ratio_dst) {
+        tmp_h = dst_h;
+        tmp_w = floor((dst_h * 1.0 / h) * w);
+    }
+    else {
+        cv::resize(src, dst, dst_size);
+        effect_area.x = 0;
+        effect_area.y = 0;
+        effect_area.width = dst_w;
+        effect_area.height = dst_h;
+        return 0;
+    }
+    cv::Mat tmp;
+    cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
+    if (tmp_w != dst_w) {
+        int index_w = floor((dst_w - tmp_w) / 2.0);
+        for (int i = 0; i < dst_h; i++) {
+            memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
+        }
+        effect_area.x = index_w;
+        effect_area.y = 0;
+        effect_area.width = tmp_w;
+        effect_area.height = tmp_h;
+    }
+    else if (tmp_h != dst_h) {
+        int index_h = floor((dst_h - tmp_h) / 2.0);
+        memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
+        effect_area.x = 0;
+        effect_area.y = index_h;
+        effect_area.width = tmp_w;
+        effect_area.height = tmp_h;
+    }
+    else {
+        printf("error\n");
+    }
+    return 0;
+}
+const int color_list[80][3] =
+{
+    {216 , 82 , 24},
+    {236 ,176 , 31},
+    {125 , 46 ,141},
+    {118 ,171 , 47},
+    { 76 ,189 ,237},
+    {238 , 19 , 46},
+    { 76 , 76 , 76},
+    {153 ,153 ,153},
+    {255 ,  0 ,  0},
+    {255 ,127 ,  0},
+    {190 ,190 ,  0},
+    {  0 ,255 ,  0},
+    {  0 ,  0 ,255},
+    {170 ,  0 ,255},
+    { 84 , 84 ,  0},
+    { 84 ,170 ,  0},
+    { 84 ,255 ,  0},
+    {170 , 84 ,  0},
+    {170 ,170 ,  0},
+    {170 ,255 ,  0},
+    {255 , 84 ,  0},
+    {255 ,170 ,  0},
+    {255 ,255 ,  0},
+    {  0 , 84 ,127},
+    {  0 ,170 ,127},
+    {  0 ,255 ,127},
+    { 84 ,  0 ,127},
+    { 84 , 84 ,127},
+    { 84 ,170 ,127},
+    { 84 ,255 ,127},
+    {170 ,  0 ,127},
+    {170 , 84 ,127},
+    {170 ,170 ,127},
+    {170 ,255 ,127},
+    {255 ,  0 ,127},
+    {255 , 84 ,127},
+    {255 ,170 ,127},
+    {255 ,255 ,127},
+    {  0 , 84 ,255},
+    {  0 ,170 ,255},
+    {  0 ,255 ,255},
+    { 84 ,  0 ,255},
+    { 84 , 84 ,255},
+    { 84 ,170 ,255},
+    { 84 ,255 ,255},
+    {170 ,  0 ,255},
+    {170 , 84 ,255},
+    {170 ,170 ,255},
+    {170 ,255 ,255},
+    {255 ,  0 ,255},
+    {255 , 84 ,255},
+    {255 ,170 ,255},
+    { 42 ,  0 ,  0},
+    { 84 ,  0 ,  0},
+    {127 ,  0 ,  0},
+    {170 ,  0 ,  0},
+    {212 ,  0 ,  0},
+    {255 ,  0 ,  0},
+    {  0 , 42 ,  0},
+    {  0 , 84 ,  0},
+    {  0 ,127 ,  0},
+    {  0 ,170 ,  0},
+    {  0 ,212 ,  0},
+    {  0 ,255 ,  0},
+    {  0 ,  0 , 42},
+    {  0 ,  0 , 84},
+    {  0 ,  0 ,127},
+    {  0 ,  0 ,170},
+    {  0 ,  0 ,212},
+    {  0 ,  0 ,255},
+    {  0 ,  0 ,  0},
+    { 36 , 36 , 36},
+    { 72 , 72 , 72},
+    {109 ,109 ,109},
+    {145 ,145 ,145},
+    {182 ,182 ,182},
+    {218 ,218 ,218},
+    {  0 ,113 ,188},
+    { 80 ,182 ,188},
+    {127 ,127 ,  0},
+};
+void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi, std::string save_path="None")
+{
+    static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
+                                        "train", "truck", "boat", "traffic light", "fire hydrant",
+                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
+                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
+                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
+                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
+                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
+                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
+                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
+                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
+                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
+                                        "scissors", "teddy bear", "hair drier", "toothbrush"
+    };
+    cv::Mat image = bgr.clone();
+    int src_w = image.cols;
+    int src_h = image.rows;
+    int dst_w = effect_roi.width;
+    int dst_h = effect_roi.height;
+    float width_ratio = (float)src_w / (float)dst_w;
+    float height_ratio = (float)src_h / (float)dst_h;
+    for (size_t i = 0; i < bboxes.size(); i++)
+    {
+        const BoxInfo& bbox = bboxes[i];
+        cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
+        cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
+                                      cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
+        char text[256];
+        sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
+        int baseLine = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+        int x = (bbox.x1 - effect_roi.x) * width_ratio;
+        int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
+        if (y < 0)
+            y = 0;
+        if (x + label_size.width > image.cols)
+            x = image.cols - label_size.width;
+        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+            color, -1);
+        cv::putText(image, text, cv::Point(x, y + label_size.height),
+            cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
+    }
+    if (save_path == "None")
+    {
+        cv::imshow("image", image);
+    }
+    else
+    {
+        cv::imwrite(save_path, image);
+        std::cout << save_path << std::endl;
+    }
+}
+int image_demo(PicoDet &detector, const char* imagepath)
+{
+    std::vector<cv::String> filenames;
+    cv::glob(imagepath, filenames, false);
+    for (auto img_name : filenames)
+    {
+        cv::Mat image = cv::imread(img_name);
+        if (image.empty())
+        {
+            fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
+            return -1;
+        }
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        std::vector<BoxInfo> results;
+        detector.detect(resized_img, results);
+        #ifdef __SAVE_RESULT__
+            std::string save_path = img_name;
+            draw_bboxes(image, results, effect_roi, save_path.replace(3, 4, "results"));
+        #else
+            draw_bboxes(image, results, effect_roi);
+            cv::waitKey(0);
+        #endif
+    }
+    return 0;
+}
+int webcam_demo(PicoDet& detector, int cam_id)
+{
+    cv::Mat image;
+    cv::VideoCapture cap(cam_id);
+    while (true)
+    {
+        cap >> image;
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        std::vector<BoxInfo> results;
+        detector.detect(resized_img, results);
+        draw_bboxes(image, results, effect_roi);
+        cv::waitKey(1);
+    }
+    return 0;
+}
+int video_demo(PicoDet& detector, const char* path)
+{
+    cv::Mat image;
+    cv::VideoCapture cap(path);
+    while (true)
+    {
+        cap >> image;
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        std::vector<BoxInfo> results;
+        detector.detect(resized_img, results);
+        draw_bboxes(image, results, effect_roi);
+        cv::waitKey(1);
+    }
+    return 0;
+}
+int benchmark(PicoDet& detector)
+{
+    int loop_num = 100;
+    int warm_up = 8;
+    double time_min = DBL_MAX;
+    double time_max = -DBL_MAX;
+    double time_avg = 0;
+    cv::Mat image(320, 320, CV_8UC3, cv::Scalar(1, 1, 1));
+    for (int i = 0; i < warm_up + loop_num; i++)
+    {
+        auto start = std::chrono::steady_clock::now();
+        std::vector<BoxInfo> results;
+        detector.detect(image, results);
+        auto end = std::chrono::steady_clock::now();
+        std::chrono::duration<double> elapsed = end - start;
+        double time = elapsed.count();
+        if (i >= warm_up)
+        {
+            time_min = (std::min)(time_min, time);
+            time_max = (std::max)(time_max, time);
+            time_avg += time;
+        }
+    }
+    time_avg /= loop_num;
+    fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
+    return 0;
+}
+int main(int argc, char** argv)
+{
+    if (argc != 3)
+    {
+        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
+        return -1;
+    }
+    PicoDet detector = PicoDet("../weight/picodet-416.mnn", 416, 416, 4, 0.45, 0.3);
+    int mode = atoi(argv[1]);
+    switch (mode)
+    {
+    case 0:{
+        int cam_id = atoi(argv[2]);
+        webcam_demo(detector, cam_id);
+        break;
+        }
+    case 1:{
+        const char* images = argv[2];
+        image_demo(detector, images);
+        break;
+        }
+    case 2:{
+        const char* path = argv[2];
+        video_demo(detector, path);
+        break;
+        }
+    case 3:{
+        benchmark(detector);
+        break;
+        }
+    default:{
+        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
+        break;
+        }
+    }
+}
--- a/deploy/third_engine/demo_mnn/picodet_mnn.cpp
+++ b/deploy/third_engine/demo_mnn/picodet_mnn.cpp
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
+#include "picodet_mnn.hpp"
+using namespace std;
+PicoDet::PicoDet(const std::string &mnn_path,
+                     int input_width, int input_length, int num_thread_,
+                     float score_threshold_, float nms_threshold_)
+{
+    num_thread = num_thread_;
+    in_w = input_width;
+    in_h = input_length;
+    score_threshold = score_threshold_;
+    nms_threshold = nms_threshold_;
+    PicoDet_interpreter = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(mnn_path.c_str()));
+    MNN::ScheduleConfig config;
+    config.numThread = num_thread;
+    MNN::BackendConfig backendConfig;
+    backendConfig.precision = (MNN::BackendConfig::PrecisionMode) 2;
+    config.backendConfig = &backendConfig;
+    PicoDet_session = PicoDet_interpreter->createSession(config);
+    input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr);
+}
+PicoDet::~PicoDet()
+{
+    PicoDet_interpreter->releaseModel();
+    PicoDet_interpreter->releaseSession(PicoDet_session);
+}
+int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list)
+{
+    if (raw_image.empty()) {
+        std::cout << "image is empty ,please check!" << std::endl;
+        return -1;
+    }
+    image_h = raw_image.rows;
+    image_w = raw_image.cols;
+    cv::Mat image;
+    cv::resize(raw_image, image, cv::Size(in_w, in_h));
+    PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w});
+    PicoDet_interpreter->resizeSession(PicoDet_session);
+    std::shared_ptr<MNN::CV::ImageProcess> pretreat(
+        MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3,
+                                        norm_vals, 3));
+    pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor);
+    auto start = chrono::steady_clock::now();
+    // run network
+    PicoDet_interpreter->runSession(PicoDet_session);
+    // get output data
+    std::vector<std::vector<BoxInfo>> results;
+    results.resize(num_class);
+    for (const auto &head_info : heads_info)
+    {
+        MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.cls_layer.c_str());
+        MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.dis_layer.c_str());
+        MNN::Tensor tensor_scores_host(tensor_scores, tensor_scores->getDimensionType());
+        tensor_scores->copyToHostTensor(&tensor_scores_host);
+        MNN::Tensor tensor_boxes_host(tensor_boxes, tensor_boxes->getDimensionType());
+        tensor_boxes->copyToHostTensor(&tensor_boxes_host);
+        decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride, score_threshold, results);
+    }
+    auto end = chrono::steady_clock::now();
+    chrono::duration<double> elapsed = end - start;
+    cout << "inference time:" << elapsed.count() << " s, ";
+    for (int i = 0; i < (int)results.size(); i++)
+    {
+        nms(results[i], nms_threshold);
+        for (auto box : results[i])
+        {
+            box.x1 = box.x1 / in_w * image_w;
+            box.x2 = box.x2 / in_w * image_w;
+            box.y1 = box.y1 / in_h * image_h;
+            box.y2 = box.y2 / in_h * image_h;
+            result_list.push_back(box);
+        }
+    }
+    cout << "detect " << result_list.size() << " objects" << endl;
+    return 0;
+}
+void PicoDet::decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>> &results)
+{
+    int feature_h = in_h / stride;
+    int feature_w = in_w / stride;
+    for (int idx = 0; idx < feature_h * feature_w; idx++)
+    {
+        const float *scores = cls_pred->host<float>() + (idx * num_class);
+        int row = idx / feature_w;
+        int col = idx % feature_w;
+        float score = 0;
+        int cur_label = 0;
+        for (int label = 0; label < num_class; label++)
+        {
+            if (scores[label] > score)
+            {
+                score = scores[label];
+                cur_label = label;
+            }
+        }
+        if (score > threshold)
+        {
+            const float *bbox_pred = dis_pred->host<float>() + (idx * 4 * (reg_max + 1));
+            results[cur_label].push_back(disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
+        }
+    }
+}
+BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride)
+{
+    float ct_x = (x + 0.5) * stride;
+    float ct_y = (y + 0.5) * stride;
+    std::vector<float> dis_pred;
+    dis_pred.resize(4);
+    for (int i = 0; i < 4; i++)
+    {
+        float dis = 0;
+        float *dis_after_sm = new float[reg_max + 1];
+        activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1);
+        for (int j = 0; j < reg_max + 1; j++)
+        {
+            dis += j * dis_after_sm[j];
+        }
+        dis *= stride;
+        dis_pred[i] = dis;
+        delete[] dis_after_sm;
+    }
+    float xmin = (std::max)(ct_x - dis_pred[0], .0f);
+    float ymin = (std::max)(ct_y - dis_pred[1], .0f);
+    float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w);
+    float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h);
+    return BoxInfo{xmin, ymin, xmax, ymax, score, label};
+}
+void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH)
+{
+    std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
+    std::vector<float> vArea(input_boxes.size());
+    for (int i = 0; i < int(input_boxes.size()); ++i)
+    {
+        vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
+    }
+    for (int i = 0; i < int(input_boxes.size()); ++i)
+    {
+        for (int j = i + 1; j < int(input_boxes.size());)
+        {
+            float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
+            float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
+            float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
+            float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
+            float w = (std::max)(float(0), xx2 - xx1 + 1);
+            float h = (std::max)(float(0), yy2 - yy1 + 1);
+            float inter = w * h;
+            float ovr = inter / (vArea[i] + vArea[j] - inter);
+            if (ovr >= NMS_THRESH)
+            {
+                input_boxes.erase(input_boxes.begin() + j);
+                vArea.erase(vArea.begin() + j);
+            }
+            else
+            {
+                j++;
+            }
+        }
+    }
+}
+string PicoDet::get_label_str(int label)
+{
+    return labels[label];
+}
+inline float fast_exp(float x)
+{
+    union
+    {
+        uint32_t i;
+        float f;
+    } v{};
+    v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+    return v.f;
+}
+inline float sigmoid(float x)
+{
+    return 1.0f / (1.0f + fast_exp(-x));
+}
+template <typename _Tp>
+int activation_function_softmax(const _Tp *src, _Tp *dst, int length)
+{
+    const _Tp alpha = *std::max_element(src, src + length);
+    _Tp denominator{0};
+    for (int i = 0; i < length; ++i)
+    {
+        dst[i] = fast_exp(src[i] - alpha);
+        denominator += dst[i];
+    }
+    for (int i = 0; i < length; ++i)
+    {
+        dst[i] /= denominator;
+    }
+    return 0;
+}
--- a/deploy/third_engine/demo_mnn/picodet_mnn.hpp
+++ b/deploy/third_engine/demo_mnn/picodet_mnn.hpp
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
+#ifndef __PicoDet_H__
+#define __PicoDet_H__
+#pragma once
+#include "Interpreter.hpp"
+#include "MNNDefine.h"
+#include "Tensor.hpp"
+#include "ImageProcess.hpp"
+#include <opencv2/opencv.hpp>
+#include <algorithm>
+#include <iostream>
+#include <string>
+#include <vector>
+#include <memory>
+#include <chrono>
+typedef struct HeadInfo_
+{
+    std::string cls_layer;
+    std::string dis_layer;
+    int stride;
+} HeadInfo;
+typedef struct BoxInfo_
+{
+    float x1;
+    float y1;
+    float x2;
+    float y2;
+    float score;
+    int label;
+} BoxInfo;
+class PicoDet {
+public:
+    PicoDet(const std::string &mnn_path,
+            int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.5, float nms_threshold_ = 0.3);
+    ~PicoDet();
+    int detect(cv::Mat &img, std::vector<BoxInfo> &result_list);
+    std::string get_label_str(int label);
+private:
+    void decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>> &results);
+    BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride);
+    void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH);
+private:
+    std::shared_ptr<MNN::Interpreter> PicoDet_interpreter;
+    MNN::Session *PicoDet_session = nullptr;
+    MNN::Tensor *input_tensor = nullptr;
+    int num_thread;
+    int image_w;
+    int image_h;
+    int in_w = 320;
+    int in_h = 320;
+    float score_threshold;
+    float nms_threshold;
+    const float mean_vals[3] = { 103.53f, 116.28f, 123.675f };
+    const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f };
+    const int num_class = 80;
+    const int reg_max = 7;
+    std::vector<HeadInfo> heads_info{
+        // cls_pred|dis_pred|stride
+        {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
+        {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
+        {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
+        {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
+    };
+    std::vector<std::string>
+    labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+           "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+           "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+           "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+           "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+           "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+           "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+           "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+           "hair drier", "toothbrush"};
+};
+template <typename _Tp>
+int activation_function_softmax(const _Tp *src, _Tp *dst, int length);
+inline float fast_exp(float x);
+inline float sigmoid(float x);
+#endif
--- a/deploy/third_engine/demo_mnn/python/demo_mnn.py
+++ b/deploy/third_engine/demo_mnn/python/demo_mnn.py
--- a/deploy/third_engine/demo_ncnn/CMakeLists.txt
+++ b/deploy/third_engine/demo_ncnn/CMakeLists.txt
+cmake_minimum_required(VERSION 3.4.1)
+set(CMAKE_CXX_STANDARD 17)
+project(picodet_demo)
+find_package(OpenMP REQUIRED)
+if(OPENMP_FOUND)
+    message("OPENMP FOUND")
+    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+endif()
+find_package(OpenCV REQUIRED)
+find_package(ncnn REQUIRED)
+if(NOT TARGET ncnn)
+    message(WARNING "ncnn NOT FOUND!  Please set ncnn_DIR environment variable")
+else()
+    message("ncnn FOUND ")
+endif()
+include_directories(
+    ${OpenCV_INCLUDE_DIRS}
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_BINARY_DIR}
+)
+add_executable(picodet_demo main.cpp picodet.cpp)
+target_link_libraries(
+    picodet_demo
+    ncnn
+    ${OpenCV_LIBS}
+)
--- a/deploy/third_engine/demo_ncnn/README.md
+++ b/deploy/third_engine/demo_ncnn/README.md
+# PicoDet NCNN Demo
+This project provides PicoDet image inference, webcam inference and benchmark using
+[Tencent's NCNN framework](https://github.com/Tencent/ncnn).
+# How to build
+## Windows
+### Step1.
+Download and Install Visual Studio from https://visualstudio.microsoft.com/vs/community/
+### Step2.
+Download and install OpenCV from https://github.com/opencv/opencv/releases
+### Step3(Optional).
+Download and install Vulkan SDK from https://vulkan.lunarg.com/sdk/home
+### Step4.
+Clone NCNN repository
+``` shell script
+git clone --recursive https://github.com/Tencent/ncnn.git
+```
+Build NCNN following this tutorial: [Build for Windows x64 using VS2017](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-windows-x64-using-visual-studio-community-2017)
+### Step5.
+Add `ncnn_DIR` = `YOUR_NCNN_PATH/build/install/lib/cmake/ncnn` to system environment variables.
+Build project: Open x64 Native Tools Command Prompt for VS 2019 or 2017
+``` cmd
+cd <this-folder>
+mkdir -p build
+cd build
+cmake ..
+msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64
+```
+## Linux
+### Step1.
+Build and install OpenCV from https://github.com/opencv/opencv
+### Step2(Optional).
+Download Vulkan SDK from https://vulkan.lunarg.com/sdk/home
+### Step3.
+Clone NCNN repository
+``` shell script
+git clone --recursive https://github.com/Tencent/ncnn.git
+```
+Build NCNN following this tutorial: [Build for Linux / NVIDIA Jetson / Raspberry Pi](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux)
+### Step4.
+Set environment variables. Run:
+``` shell script
+export ncnn_DIR=YOUR_NCNN_PATH/build/install/lib/cmake/ncnn
+```
+Build project
+``` shell script
+cd <this-folder>
+mkdir build
+cd build
+cmake ..
+make
+```
+# Run demo
+Download PicoDet ncnn model.
+* [PicoDet ncnn model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_ncnn.zip)
+## Webcam
+```shell script
+picodet_demo 0 0
+```
+## Inference images
+```shell script
+picodet_demo 1 IMAGE_FOLDER/*.jpg
+```
+## Inference video
+```shell script
+picodet_demo 2 VIDEO_PATH
+```
+## Benchmark
+```shell script
+picodet_demo 3 0
+result: picodet  min = 17.74  max = 22.71  avg = 18.16
+```
+****
+Notice:
+If benchmark speed is slow, try to limit omp thread num.
+Linux:
+```shell script
+export OMP_THREAD_LIMIT=4
+```
--- a/deploy/third_engine/demo_ncnn/main.cpp
+++ b/deploy/third_engine/demo_ncnn/main.cpp
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include <iostream>
+#include <net.h>
+#include "picodet.h"
+#include <benchmark.h>
+struct object_rect {
+    int x;
+    int y;
+    int width;
+    int height;
+};
+int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
+{
+    int w = src.cols;
+    int h = src.rows;
+    int dst_w = dst_size.width;
+    int dst_h = dst_size.height;
+    dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
+    float ratio_src = w * 1.0 / h;
+    float ratio_dst = dst_w * 1.0 / dst_h;
+    int tmp_w = 0;
+    int tmp_h = 0;
+    if (ratio_src > ratio_dst) {
+        tmp_w = dst_w;
+        tmp_h = floor((dst_w * 1.0 / w) * h);
+    }
+    else if (ratio_src < ratio_dst) {
+        tmp_h = dst_h;
+        tmp_w = floor((dst_h * 1.0 / h) * w);
+    }
+    else {
+        cv::resize(src, dst, dst_size);
+        effect_area.x = 0;
+        effect_area.y = 0;
+        effect_area.width = dst_w;
+        effect_area.height = dst_h;
+        return 0;
+    }
+    cv::Mat tmp;
+    cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
+    if (tmp_w != dst_w) {
+        int index_w = floor((dst_w - tmp_w) / 2.0);
+        for (int i = 0; i < dst_h; i++) {
+            memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
+        }
+        effect_area.x = index_w;
+        effect_area.y = 0;
+        effect_area.width = tmp_w;
+        effect_area.height = tmp_h;
+    }
+    else if (tmp_h != dst_h) {
+        int index_h = floor((dst_h - tmp_h) / 2.0);
+        memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
+        effect_area.x = 0;
+        effect_area.y = index_h;
+        effect_area.width = tmp_w;
+        effect_area.height = tmp_h;
+    }
+    else {
+        printf("error\n");
+    }
+    return 0;
+}
+const int color_list[80][3] =
+{
+    {216 , 82 , 24},
+    {236 ,176 , 31},
+    {125 , 46 ,141},
+    {118 ,171 , 47},
+    { 76 ,189 ,237},
+    {238 , 19 , 46},
+    { 76 , 76 , 76},
+    {153 ,153 ,153},
+    {255 ,  0 ,  0},
+    {255 ,127 ,  0},
+    {190 ,190 ,  0},
+    {  0 ,255 ,  0},
+    {  0 ,  0 ,255},
+    {170 ,  0 ,255},
+    { 84 , 84 ,  0},
+    { 84 ,170 ,  0},
+    { 84 ,255 ,  0},
+    {170 , 84 ,  0},
+    {170 ,170 ,  0},
+    {170 ,255 ,  0},
+    {255 , 84 ,  0},
+    {255 ,170 ,  0},
+    {255 ,255 ,  0},
+    {  0 , 84 ,127},
+    {  0 ,170 ,127},
+    {  0 ,255 ,127},
+    { 84 ,  0 ,127},
+    { 84 , 84 ,127},
+    { 84 ,170 ,127},
+    { 84 ,255 ,127},
+    {170 ,  0 ,127},
+    {170 , 84 ,127},
+    {170 ,170 ,127},
+    {170 ,255 ,127},
+    {255 ,  0 ,127},
+    {255 , 84 ,127},
+    {255 ,170 ,127},
+    {255 ,255 ,127},
+    {  0 , 84 ,255},
+    {  0 ,170 ,255},
+    {  0 ,255 ,255},
+    { 84 ,  0 ,255},
+    { 84 , 84 ,255},
+    { 84 ,170 ,255},
+    { 84 ,255 ,255},
+    {170 ,  0 ,255},
+    {170 , 84 ,255},
+    {170 ,170 ,255},
+    {170 ,255 ,255},
+    {255 ,  0 ,255},
+    {255 , 84 ,255},
+    {255 ,170 ,255},
+    { 42 ,  0 ,  0},
+    { 84 ,  0 ,  0},
+    {127 ,  0 ,  0},
+    {170 ,  0 ,  0},
+    {212 ,  0 ,  0},
+    {255 ,  0 ,  0},
+    {  0 , 42 ,  0},
+    {  0 , 84 ,  0},
+    {  0 ,127 ,  0},
+    {  0 ,170 ,  0},
+    {  0 ,212 ,  0},
+    {  0 ,255 ,  0},
+    {  0 ,  0 , 42},
+    {  0 ,  0 , 84},
+    {  0 ,  0 ,127},
+    {  0 ,  0 ,170},
+    {  0 ,  0 ,212},
+    {  0 ,  0 ,255},
+    {  0 ,  0 ,  0},
+    { 36 , 36 , 36},
+    { 72 , 72 , 72},
+    {109 ,109 ,109},
+    {145 ,145 ,145},
+    {182 ,182 ,182},
+    {218 ,218 ,218},
+    {  0 ,113 ,188},
+    { 80 ,182 ,188},
+    {127 ,127 ,  0},
+};
+void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi)
+{
+    static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
+                                        "train", "truck", "boat", "traffic light", "fire hydrant",
+                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
+                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
+                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
+                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
+                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
+                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
+                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
+                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
+                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
+                                        "scissors", "teddy bear", "hair drier", "toothbrush"
+    };
+    cv::Mat image = bgr.clone();
+    int src_w = image.cols;
+    int src_h = image.rows;
+    int dst_w = effect_roi.width;
+    int dst_h = effect_roi.height;
+    float width_ratio = (float)src_w / (float)dst_w;
+    float height_ratio = (float)src_h / (float)dst_h;
+    for (size_t i = 0; i < bboxes.size(); i++)
+    {
+        const BoxInfo& bbox = bboxes[i];
+        cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
+        cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
+                                      cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
+        char text[256];
+        sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
+        int baseLine = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+        int x = (bbox.x1 - effect_roi.x) * width_ratio;
+        int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
+        if (y < 0)
+            y = 0;
+        if (x + label_size.width > image.cols)
+            x = image.cols - label_size.width;
+        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+            color, -1);
+        cv::putText(image, text, cv::Point(x, y + label_size.height),
+            cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
+    }
+    cv::imwrite("../result/test_picodet.jpg", image);
+    printf("************infer image success!!!**********\n");
+}
+int image_demo(PicoDet &detector, const char* imagepath)
+{
+    std::vector<std::string> filenames;
+    cv::glob(imagepath, filenames, false);
+    for (auto img_name : filenames)
+    {
+        cv::Mat image = cv::imread(img_name);
+        if (image.empty())
+        {
+            fprintf(stderr, "cv::imread %s failed\n", img_name);
+            return -1;
+        }
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        auto results = detector.detect(resized_img, 0.4, 0.5);
+        char imgName[20] = {};
+        draw_bboxes(image, results, effect_roi);
+        cv::waitKey(0);
+    }
+    return 0;
+}
+int webcam_demo(PicoDet& detector, int cam_id)
+{
+    cv::Mat image;
+    cv::VideoCapture cap(cam_id);
+    while (true)
+    {
+        cap >> image;
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        auto results = detector.detect(resized_img, 0.4, 0.5);
+        draw_bboxes(image, results, effect_roi);
+        cv::waitKey(1);
+    }
+    return 0;
+}
+int video_demo(PicoDet& detector, const char* path)
+{
+    cv::Mat image;
+    cv::VideoCapture cap(path);
+    while (true)
+    {
+        cap >> image;
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        auto results = detector.detect(resized_img, 0.4, 0.5);
+        draw_bboxes(image, results, effect_roi);
+        cv::waitKey(1);
+    }
+    return 0;
+}
+int benchmark(PicoDet& detector)
+{
+    int loop_num = 100;
+    int warm_up = 8;
+    double time_min = DBL_MAX;
+    double time_max = -DBL_MAX;
+    double time_avg = 0;
+    ncnn::Mat input = ncnn::Mat(320, 320, 3);
+    input.fill(0.01f);
+    for (int i = 0; i < warm_up + loop_num; i++)
+    {
+        double start = ncnn::get_current_time();
+        ncnn::Extractor ex = detector.Net->create_extractor();
+        ex.input("image", input); // picodet
+        for (const auto& head_info : detector.heads_info)
+        {
+            ncnn::Mat dis_pred;
+            ncnn::Mat cls_pred;
+            ex.extract(head_info.dis_layer.c_str(), dis_pred);
+            ex.extract(head_info.cls_layer.c_str(), cls_pred);
+        }
+        double end = ncnn::get_current_time();
+        double time = end - start;
+        if (i >= warm_up)
+        {
+            time_min = (std::min)(time_min, time);
+            time_max = (std::max)(time_max, time);
+            time_avg += time;
+        }
+    }
+    time_avg /= loop_num;
+    fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
+    return 0;
+}
+int main(int argc, char** argv)
+{
+    if (argc != 3)
+    {
+        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
+        return -1;
+    }
+    PicoDet detector = PicoDet("../weight/picodet_m_416.param", "../weight/picodet_m_416.bin", true);
+    int mode = atoi(argv[1]);
+    switch (mode)
+    {
+    case 0:{
+        int cam_id = atoi(argv[2]);
+        webcam_demo(detector, cam_id);
+        break;
+        }
+    case 1:{
+        const char* images = argv[2];
+        image_demo(detector, images);
+        break;
+        }
+    case 2:{
+        const char* path = argv[2];
+        video_demo(detector, path);
+        break;
+        }
+    case 3:{
+        benchmark(detector);
+        break;
+        }
+    default:{
+        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
+        break;
+        }
+    }
+}
--- a/deploy/third_engine/demo_ncnn/picodet.cpp
+++ b/deploy/third_engine/demo_ncnn/picodet.cpp
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
+#include "picodet.h"
+#include <benchmark.h>
+#include <iostream>
+inline float fast_exp(float x)
+{
+    union {
+        uint32_t i;
+        float f;
+    } v{};
+    v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+    return v.f;
+}
+inline float sigmoid(float x)
+{
+    return 1.0f / (1.0f + fast_exp(-x));
+}
+template<typename _Tp>
+int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
+{
+    const _Tp alpha = *std::max_element(src, src + length);
+    _Tp denominator{ 0 };
+    for (int i = 0; i < length; ++i) {
+        dst[i] = fast_exp(src[i] - alpha);
+        denominator += dst[i];
+    }
+    for (int i = 0; i < length; ++i) {
+        dst[i] /= denominator;
+    }
+    return 0;
+}
+bool PicoDet::hasGPU = false;
+PicoDet* PicoDet::detector = nullptr;
+PicoDet::PicoDet(const char* param, const char* bin, bool useGPU)
+{
+    this->Net = new ncnn::Net();
+#if NCNN_VULKAN
+    this->hasGPU = ncnn::get_gpu_count() > 0;
+#endif
+    this->Net->opt.use_vulkan_compute = this->hasGPU && useGPU;
+    this->Net->opt.use_fp16_arithmetic = true;
+    this->Net->load_param(param);
+    this->Net->load_model(bin);
+}
+PicoDet::~PicoDet()
+{
+    delete this->Net;
+}
+void PicoDet::preprocess(cv::Mat& image, ncnn::Mat& in)
+{
+    int img_w = image.cols;
+    int img_h = image.rows;
+    in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h);
+    const float mean_vals[3] = { 103.53f, 116.28f, 123.675f };
+    const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f };
+    in.substract_mean_normalize(mean_vals, norm_vals);
+}
+std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold)
+{
+    ncnn::Mat input;
+    preprocess(image, input);
+    auto ex = this->Net->create_extractor();
+    ex.set_light_mode(false);
+    ex.set_num_threads(4);
+#if NCNN_VULKAN
+    ex.set_vulkan_compute(this->hasGPU);
+#endif
+    ex.input("image", input); //picodet
+    std::vector<std::vector<BoxInfo>> results;
+    results.resize(this->num_class);
+    for (const auto& head_info : this->heads_info)
+    {
+        ncnn::Mat dis_pred;
+        ncnn::Mat cls_pred;
+        ex.extract(head_info.dis_layer.c_str(), dis_pred);
+        ex.extract(head_info.cls_layer.c_str(), cls_pred);
+        this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results);
+    }
+    std::vector<BoxInfo> dets;
+    for (int i = 0; i < (int)results.size(); i++)
+    {
+        this->nms(results[i], nms_threshold);
+        for (auto box : results[i])
+        {
+            dets.push_back(box);
+        }
+    }
+    return dets;
+}
+void PicoDet::decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results)
+{
+    int feature_h = this->input_size[1] / stride;
+    int feature_w = this->input_size[0] / stride;
+    for (int idx = 0; idx < feature_h * feature_w; idx++)
+    {
+        const float* scores = cls_pred.row(idx);
+        int row = idx / feature_w;
+        int col = idx % feature_w;
+        float score = 0;
+        int cur_label = 0;
+        for (int label = 0; label < this->num_class; label++)
+        {
+            if (scores[label] > score)
+            {
+                score = scores[label];
+                cur_label = label;
+            }
+        }
+        if (score > threshold)
+        {
+            const float* bbox_pred = dis_pred.row(idx);
+            results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
+        }
+    }
+}
+BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride)
+{
+    float ct_x = (x + 0.5) * stride;
+    float ct_y = (y + 0.5) * stride;
+    std::vector<float> dis_pred;
+    dis_pred.resize(4);
+    for (int i = 0; i < 4; i++)
+    {
+        float dis = 0;
+        float* dis_after_sm = new float[this->reg_max + 1];
+        activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm, this->reg_max + 1);
+        for (int j = 0; j < this->reg_max + 1; j++)
+        {
+            dis += j * dis_after_sm[j];
+        }
+        dis *= stride;
+        dis_pred[i] = dis;
+        delete[] dis_after_sm;
+    }
+    float xmin = (std::max)(ct_x - dis_pred[0], .0f);
+    float ymin = (std::max)(ct_y - dis_pred[1], .0f);
+    float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]);
+    float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]);
+    return BoxInfo { xmin, ymin, xmax, ymax, score, label };
+}
+void PicoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH)
+{
+    std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
+    std::vector<float> vArea(input_boxes.size());
+    for (int i = 0; i < int(input_boxes.size()); ++i) {
+        vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
+            * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
+    }
+    for (int i = 0; i < int(input_boxes.size()); ++i) {
+        for (int j = i + 1; j < int(input_boxes.size());) {
+            float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
+            float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
+            float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
+            float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
+            float w = (std::max)(float(0), xx2 - xx1 + 1);
+            float h = (std::max)(float(0), yy2 - yy1 + 1);
+            float inter = w * h;
+            float ovr = inter / (vArea[i] + vArea[j] - inter);
+            if (ovr >= NMS_THRESH) {
+                input_boxes.erase(input_boxes.begin() + j);
+                vArea.erase(vArea.begin() + j);
+            }
+            else {
+                j++;
+            }
+        }
+    }
+}
--- a/deploy/third_engine/demo_ncnn/picodet.h
+++ b/deploy/third_engine/demo_ncnn/picodet.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
+#ifndef PICODET_H
+#define PICODET_H
+#include <opencv2/core/core.hpp>
+#include <net.h>
+typedef struct HeadInfo
+{
+    std::string cls_layer;
+    std::string dis_layer;
+    int stride;
+};
+typedef struct BoxInfo
+{
+    float x1;
+    float y1;
+    float x2;
+    float y2;
+    float score;
+    int label;
+} BoxInfo;
+class PicoDet
+{
+public:
+    PicoDet(const char* param, const char* bin, bool useGPU);
+    ~PicoDet();
+    static PicoDet* detector;
+    ncnn::Net* Net;
+    static bool hasGPU;
+    std::vector<HeadInfo> heads_info{
+        // cls_pred|dis_pred|stride
+        {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
+        {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
+        {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
+        {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
+    };
+    std::vector<BoxInfo> detect(cv::Mat image, float score_threshold, float nms_threshold);
+    std::vector<std::string> labels{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
+                                    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
+                                    "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                                    "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
+                                    "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
+                                    "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                                    "potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
+                                    "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
+                                    "hair drier", "toothbrush" };
+private:
+    void preprocess(cv::Mat& image, ncnn::Mat& in);
+    void decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results);
+    BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride);
+    static void nms(std::vector<BoxInfo>& result, float nms_threshold);
+    int input_size[2] = {320, 320};
+    int num_class = 80;
+    int reg_max = 7;
+};
+#endif
--- a/deploy/third_engine/demo_ncnn/python/demo_ncnn.py
+++ b/deploy/third_engine/demo_ncnn/python/demo_ncnn.py
--- a/deploy/third_engine/demo_openvino/CMakeLists.txt
+++ b/deploy/third_engine/demo_openvino/CMakeLists.txt
+cmake_minimum_required(VERSION 3.4.1)
+set(CMAKE_CXX_STANDARD 14)
+project(picodet_demo)
+find_package(OpenCV REQUIRED)
+find_package(InferenceEngine REQUIRED)
+find_package(ngraph REQUIRED)
+include_directories(
+    ${OpenCV_INCLUDE_DIRS}
+    ${CMAKE_CURRENT_SOURCE_DIR}
+    ${CMAKE_CURRENT_BINARY_DIR}
+)
+add_executable(picodet_demo main.cpp picodet_openvino.cpp)
+target_link_libraries(
+    picodet_demo
+    ${InferenceEngine_LIBRARIES}
+    ${NGRAPH_LIBRARIES}
+    ${OpenCV_LIBS}
+)
--- a/deploy/third_engine/demo_openvino/README.md
+++ b/deploy/third_engine/demo_openvino/README.md
+# PicoDet OpenVINO Demo
+This fold provides PicoDet inference code using
+[Intel's OpenVINO Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html). Most of the implements in this fold are same as *demo_ncnn*.  
+**Recommand** to use the xxx.tar.gz file to install instead of github method.
+## Install OpenVINO Toolkit
+Go to [OpenVINO HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
+Download a suitable version and install.
+Follow the official Get Started Guides: https://docs.openvinotoolkit.org/latest/get_started_guides.html
+## Set the Environment Variables
+### Windows:
+Run this command in cmd. (Every time before using OpenVINO)
+```cmd
+<INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
+```
+Or set the system environment variables once for all:
+Name                  |Value
+:--------------------:|:--------:
+INTEL_OPENVINO_DIR | <INSTSLL_DIR>\openvino_2021
+INTEL_CVSDK_DIR | %INTEL_OPENVINO_DIR%
+InferenceEngine_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\share
+HDDL_INSTALL_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\hddl
+ngraph_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\ngraph\cmake
+And add this to ```Path```
+```
+%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Debug;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Release;%HDDL_INSTALL_DIR%\bin;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\tbb\bin;%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib
+```
+### Linux
+Run this command in shell. (Every time before using OpenVINO)
+```shell
+source /opt/intel/openvino_2021/bin/setupvars.sh
+```
+Or edit .bashrc
+```shell
+vi ~/.bashrc
+```
+Add this line to the end of the file
+```shell
+source /opt/intel/openvino_2021/bin/setupvars.sh
+```
+## Convert model
+   Convert to OpenVINO
+   ``` shell
+   cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer
+   ```
+   Install requirements for convert tool
+   ```shell
+   cd ./install_prerequisites
+   sudo install_prerequisites_onnx.sh
+   ```
+   Then convert model. Notice: mean_values and scale_values should be the same with your training settings in YAML config file.
+   ```shell
+   python3 mo_onnx.py --input_model <ONNX_MODEL> --mean_values [103.53,116.28,123.675] --scale_values [57.375,57.12,58.395]
+   ```
+## Build
+### Windows
+```cmd
+<OPENVINO_INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
+mkdir -p build
+cd build
+cmake ..
+msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64
+```
+### Linux
+```shell
+source /opt/intel/openvino_2021/bin/setupvars.sh
+mkdir build
+cd build
+cmake ..
+make
+```
+## Run demo
+Download PicoDet openvino model [PicoDet openvino model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_openvino.zip).
+move picodet openvino model files to the demo's weight folder. Then run these commands:
+### Webcam
+```shell
+picodet_demo 0 0
+```
+### Inference images
+```shell
+picodet_demo 1 IMAGE_FOLDER/*.jpg
+```
+### Inference video
+```shell
+picodet_demo 2 VIDEO_PATH
+```
+### Benchmark
+```shell
+picodet_demo 3 0
+```
--- a/deploy/third_engine/demo_openvino/main.cpp
+++ b/deploy/third_engine/demo_openvino/main.cpp
+// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet
+#include "picodet_openvino.h"
+#include <opencv2/core/core.hpp>
+#include <opencv2/highgui/highgui.hpp>
+#include <opencv2/imgproc/imgproc.hpp>
+#include <iostream>
+struct object_rect {
+    int x;
+    int y;
+    int width;
+    int height;
+};
+int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
+{
+    int w = src.cols;
+    int h = src.rows;
+    int dst_w = dst_size.width;
+    int dst_h = dst_size.height;
+    dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
+    float ratio_src = w * 1.0 / h;
+    float ratio_dst = dst_w * 1.0 / dst_h;
+    int tmp_w = 0;
+    int tmp_h = 0;
+    if (ratio_src > ratio_dst) {
+        tmp_w = dst_w;
+        tmp_h = floor((dst_w * 1.0 / w) * h);
+    }
+    else if (ratio_src < ratio_dst) {
+        tmp_h = dst_h;
+        tmp_w = floor((dst_h * 1.0 / h) * w);
+    }
+    else {
+        cv::resize(src, dst, dst_size);
+        effect_area.x = 0;
+        effect_area.y = 0;
+        effect_area.width = dst_w;
+        effect_area.height = dst_h;
+        return 0;
+    }
+    cv::Mat tmp;
+    cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
+    if (tmp_w != dst_w) {
+        int index_w = floor((dst_w - tmp_w) / 2.0);
+        for (int i = 0; i < dst_h; i++) {
+            memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
+        }
+        effect_area.x = index_w;
+        effect_area.y = 0;
+        effect_area.width = tmp_w;
+        effect_area.height = tmp_h;
+    }
+    else if (tmp_h != dst_h) {
+        int index_h = floor((dst_h - tmp_h) / 2.0);
+        memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
+        effect_area.x = 0;
+        effect_area.y = index_h;
+        effect_area.width = tmp_w;
+        effect_area.height = tmp_h;
+    }
+    else {
+        printf("error\n");
+    }
+    return 0;
+}
+const int color_list[80][3] =
+{
+    {216 , 82 , 24},
+    {236 ,176 , 31},
+    {125 , 46 ,141},
+    {118 ,171 , 47},
+    { 76 ,189 ,237},
+    {238 , 19 , 46},
+    { 76 , 76 , 76},
+    {153 ,153 ,153},
+    {255 ,  0 ,  0},
+    {255 ,127 ,  0},
+    {190 ,190 ,  0},
+    {  0 ,255 ,  0},
+    {  0 ,  0 ,255},
+    {170 ,  0 ,255},
+    { 84 , 84 ,  0},
+    { 84 ,170 ,  0},
+    { 84 ,255 ,  0},
+    {170 , 84 ,  0},
+    {170 ,170 ,  0},
+    {170 ,255 ,  0},
+    {255 , 84 ,  0},
+    {255 ,170 ,  0},
+    {255 ,255 ,  0},
+    {  0 , 84 ,127},
+    {  0 ,170 ,127},
+    {  0 ,255 ,127},
+    { 84 ,  0 ,127},
+    { 84 , 84 ,127},
+    { 84 ,170 ,127},
+    { 84 ,255 ,127},
+    {170 ,  0 ,127},
+    {170 , 84 ,127},
+    {170 ,170 ,127},
+    {170 ,255 ,127},
+    {255 ,  0 ,127},
+    {255 , 84 ,127},
+    {255 ,170 ,127},
+    {255 ,255 ,127},
+    {  0 , 84 ,255},
+    {  0 ,170 ,255},
+    {  0 ,255 ,255},
+    { 84 ,  0 ,255},
+    { 84 , 84 ,255},
+    { 84 ,170 ,255},
+    { 84 ,255 ,255},
+    {170 ,  0 ,255},
+    {170 , 84 ,255},
+    {170 ,170 ,255},
+    {170 ,255 ,255},
+    {255 ,  0 ,255},
+    {255 , 84 ,255},
+    {255 ,170 ,255},
+    { 42 ,  0 ,  0},
+    { 84 ,  0 ,  0},
+    {127 ,  0 ,  0},
+    {170 ,  0 ,  0},
+    {212 ,  0 ,  0},
+    {255 ,  0 ,  0},
+    {  0 , 42 ,  0},
+    {  0 , 84 ,  0},
+    {  0 ,127 ,  0},
+    {  0 ,170 ,  0},
+    {  0 ,212 ,  0},
+    {  0 ,255 ,  0},
+    {  0 ,  0 , 42},
+    {  0 ,  0 , 84},
+    {  0 ,  0 ,127},
+    {  0 ,  0 ,170},
+    {  0 ,  0 ,212},
+    {  0 ,  0 ,255},
+    {  0 ,  0 ,  0},
+    { 36 , 36 , 36},
+    { 72 , 72 , 72},
+    {109 ,109 ,109},
+    {145 ,145 ,145},
+    {182 ,182 ,182},
+    {218 ,218 ,218},
+    {  0 ,113 ,188},
+    { 80 ,182 ,188},
+    {127 ,127 ,  0},
+};
+void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi)
+{
+    static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
+                                        "train", "truck", "boat", "traffic light", "fire hydrant",
+                                        "stop sign", "parking meter", "bench", "bird", "cat", "dog",
+                                        "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
+                                        "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
+                                        "skis", "snowboard", "sports ball", "kite", "baseball bat",
+                                        "baseball glove", "skateboard", "surfboard", "tennis racket",
+                                        "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
+                                        "banana", "apple", "sandwich", "orange", "broccoli", "carrot",
+                                        "hot dog", "pizza", "donut", "cake", "chair", "couch",
+                                        "potted plant", "bed", "dining table", "toilet", "tv", "laptop",
+                                        "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
+                                        "toaster", "sink", "refrigerator", "book", "clock", "vase",
+                                        "scissors", "teddy bear", "hair drier", "toothbrush"
+    };
+    cv::Mat image = bgr.clone();
+    int src_w = image.cols;
+    int src_h = image.rows;
+    int dst_w = effect_roi.width;
+    int dst_h = effect_roi.height;
+    float width_ratio = (float)src_w / (float)dst_w;
+    float height_ratio = (float)src_h / (float)dst_h;
+    for (size_t i = 0; i < bboxes.size(); i++)
+    {
+        const BoxInfo& bbox = bboxes[i];
+        cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
+        cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
+                                      cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
+        char text[256];
+        sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
+        int baseLine = 0;
+        cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
+        int x = (bbox.x1 - effect_roi.x) * width_ratio;
+        int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
+        if (y < 0)
+            y = 0;
+        if (x + label_size.width > image.cols)
+            x = image.cols - label_size.width;
+        cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
+            color, -1);
+        cv::putText(image, text, cv::Point(x, y + label_size.height),
+            cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
+    }
+    cv::imshow("image", image);
+}
+int image_demo(PicoDet& detector, const char* imagepath)
+{
+    std::vector<std::string> filenames;
+    cv::glob(imagepath, filenames, false);
+    for (auto img_name : filenames)
+    {
+        cv::Mat image = cv::imread(img_name);
+        if (image.empty())
+        {
+            return -1;
+        }
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        auto results = detector.detect(resized_img, 0.4, 0.5);
+        draw_bboxes(image, results, effect_roi);
+    }
+    return 0;
+}
+int webcam_demo(PicoDet& detector, int cam_id)
+{
+    cv::Mat image;
+    cv::VideoCapture cap(cam_id);
+    while (true)
+    {
+        cap >> image;
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        auto results = detector.detect(resized_img, 0.4, 0.5);
+        draw_bboxes(image, results, effect_roi);
+        cv::waitKey(1);
+    }
+    return 0;
+}
+int video_demo(PicoDet& detector, const char* path)
+{
+    cv::Mat image;
+    cv::VideoCapture cap(path);
+    while (true)
+    {
+        cap >> image;
+        object_rect effect_roi;
+        cv::Mat resized_img;
+        resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
+        auto results = detector.detect(resized_img, 0.4, 0.5);
+        draw_bboxes(image, results, effect_roi);
+        cv::waitKey(1);
+    }
+    return 0;
+}
+int benchmark(PicoDet& detector)
+{
+    int loop_num = 100;
+    int warm_up = 8;
+    double time_min = DBL_MAX;
+    double time_max = -DBL_MAX;
+    double time_avg = 0;
+    cv::Mat image(320, 320, CV_8UC3, cv::Scalar(1, 1, 1));
+    for (int i = 0; i < warm_up + loop_num; i++)
+    {
+        auto start = std::chrono::steady_clock::now();
+        std::vector<BoxInfo> results;
+        results = detector.detect(image, 0.4, 0.5);
+        auto end = std::chrono::steady_clock::now();
+        double time = std::chrono::duration<double, std::milli>(end - start).count();
+        if (i >= warm_up)
+        {
+            time_min = (std::min)(time_min, time);
+            time_max = (std::max)(time_max, time);
+            time_avg += time;
+        }
+    }
+    time_avg /= loop_num;
+    fprintf(stderr, "%20s  min = %7.2f  max = %7.2f  avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
+    return 0;
+}
+int main(int argc, char** argv)
+{
+    if (argc != 3)
+    {
+        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
+        return -1;
+    }
+    std::cout<<"start init model"<<std::endl;
+    auto detector = PicoDet("../weight/picodet_m_416.xml");
+    std::cout<<"success"<<std::endl;
+    int mode = atoi(argv[1]);
+    switch (mode)
+    {
+    case 0:{
+        int cam_id = atoi(argv[2]);
+        webcam_demo(detector, cam_id);
+        break;
+        }
+    case 1:{
+        const char* images = argv[2];
+        image_demo(detector, images);
+        break;
+        }
+    case 2:{
+        const char* path = argv[2];
+        video_demo(detector, path);
+        break;
+        }
+    case 3:{
+        benchmark(detector);
+        break;
+        }
+    default:{
+        fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
+        break;
+        }
+    }
+}
--- a/deploy/third_engine/demo_openvino/picodet_openvino.cpp
+++ b/deploy/third_engine/demo_openvino/picodet_openvino.cpp
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
+#include "picodet_openvino.h"
+inline float fast_exp(float x)
+{
+    union {
+        uint32_t i;
+        float f;
+    } v{};
+    v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
+    return v.f;
+}
+inline float sigmoid(float x)
+{
+    return 1.0f / (1.0f + fast_exp(-x));
+}
+template<typename _Tp>
+int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
+{
+    const _Tp alpha = *std::max_element(src, src + length);
+    _Tp denominator{ 0 };
+    for (int i = 0; i < length; ++i)
+    {
+        dst[i] = fast_exp(src[i] - alpha);
+        denominator += dst[i];
+    }
+    for (int i = 0; i < length; ++i)
+    {
+        dst[i] /= denominator;
+    }
+    return 0;
+}
+PicoDet::PicoDet(const char* model_path)
+{
+    InferenceEngine::Core ie;
+    InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path);
+    // prepare input settings
+    InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
+    input_name_ = inputs_map.begin()->first;
+    InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
+    //prepare output settings
+    InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
+    for (auto &output_info : outputs_map)
+    {
+        output_info.second->setPrecision(InferenceEngine::Precision::FP32);
+    }
+    //get network
+    network_ = ie.LoadNetwork(model, "CPU");
+    infer_request_ = network_.CreateInferRequest();
+}
+PicoDet::~PicoDet()
+{
+}
+void PicoDet::preprocess(cv::Mat& image, InferenceEngine::Blob::Ptr& blob)
+{
+    int img_w = image.cols;
+    int img_h = image.rows;
+    int channels = 3;
+    InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
+    if (!mblob)
+    {
+        THROW_IE_EXCEPTION << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
+            << "but by fact we were not able to cast inputBlob to MemoryBlob";
+    }
+    auto mblobHolder = mblob->wmap();
+    float *blob_data = mblobHolder.as<float *>();
+    for (size_t c = 0; c < channels; c++)
+    {
+        for (size_t  h = 0; h < img_h; h++)
+        {
+            for (size_t w = 0; w < img_w; w++)
+            {
+                blob_data[c * img_w * img_h + h * img_w + w] =
+                    (float)image.at<cv::Vec3b>(h, w)[c];
+            }
+        }
+    }
+}
+std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold)
+{
+    InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_);
+    preprocess(image, input_blob);
+    // do inference
+    infer_request_.Infer();
+    // get output
+    std::vector<std::vector<BoxInfo>> results;
+    results.resize(this->num_class_);
+    for (const auto& head_info : this->heads_info_)
+    {
+        const InferenceEngine::Blob::Ptr dis_pred_blob = infer_request_.GetBlob(head_info.dis_layer);
+        const InferenceEngine::Blob::Ptr cls_pred_blob = infer_request_.GetBlob(head_info.cls_layer);
+        auto mdis_pred = InferenceEngine::as<InferenceEngine::MemoryBlob>(dis_pred_blob);
+        auto mdis_pred_holder = mdis_pred->rmap();
+        const float *dis_pred = mdis_pred_holder.as<const float *>();
+        auto mcls_pred = InferenceEngine::as<InferenceEngine::MemoryBlob>(cls_pred_blob);
+        auto mcls_pred_holder = mcls_pred->rmap();
+        const float *cls_pred = mcls_pred_holder.as<const float *>();
+        this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results);
+    }
+    std::vector<BoxInfo> dets;
+    for (int i = 0; i < (int)results.size(); i++)
+    {
+        this->nms(results[i], nms_threshold);
+        for (auto& box : results[i])
+        {
+            dets.push_back(box);
+        }
+    }
+    return dets;
+}
+void PicoDet::decode_infer(const float*& cls_pred, const float*& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results)
+{
+    int feature_h = input_size_ / stride;
+    int feature_w = input_size_ / stride;
+    for (int idx = 0; idx < feature_h * feature_w; idx++)
+    {
+        int row = idx / feature_w;
+        int col = idx % feature_w;
+        float score = 0;
+        int cur_label = 0;
+        for (int label = 0; label < num_class_; label++)
+        {
+            if (cls_pred[idx * num_class_ +label] > score)
+            {
+                score = cls_pred[idx * num_class_ + label];
+                cur_label = label;
+            }
+        }
+        if (score > threshold)
+        {
+            const float* bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4;
+            results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
+        }
+    }
+}
+BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride)
+{
+    float ct_x = (x + 0.5) * stride;
+    float ct_y = (y + 0.5) * stride;
+    std::vector<float> dis_pred;
+    dis_pred.resize(4);
+    for (int i = 0; i < 4; i++)
+    {
+        float dis = 0;
+        float* dis_after_sm = new float[reg_max_ + 1];
+        activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm, reg_max_ + 1);
+        for (int j = 0; j < reg_max_ + 1; j++)
+        {
+            dis += j * dis_after_sm[j];
+        }
+        dis *= stride;
+        dis_pred[i] = dis;
+        delete[] dis_after_sm;
+    }
+    float xmin = (std::max)(ct_x - dis_pred[0], .0f);
+    float ymin = (std::max)(ct_y - dis_pred[1], .0f);
+    float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_);
+    float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_);
+    return BoxInfo { xmin, ymin, xmax, ymax, score, label };
+}
+void PicoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH)
+{
+    std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
+    std::vector<float> vArea(input_boxes.size());
+    for (int i = 0; i < int(input_boxes.size()); ++i)
+    {
+        vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
+            * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
+    }
+    for (int i = 0; i < int(input_boxes.size()); ++i)
+    {
+        for (int j = i + 1; j < int(input_boxes.size());)
+        {
+            float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
+            float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
+            float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
+            float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
+            float w = (std::max)(float(0), xx2 - xx1 + 1);
+            float h = (std::max)(float(0), yy2 - yy1 + 1);
+            float inter = w * h;
+            float ovr = inter / (vArea[i] + vArea[j] - inter);
+            if (ovr >= NMS_THRESH)
+            {
+                input_boxes.erase(input_boxes.begin() + j);
+                vArea.erase(vArea.begin() + j);
+            }
+            else
+            {
+                j++;
+            }
+        }
+    }
+}
--- a/deploy/third_engine/demo_openvino/picodet_openvino.h
+++ b/deploy/third_engine/demo_openvino/picodet_openvino.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
+#ifndef _PICODET_OPENVINO_H_
+#define _PICODET_OPENVINO_H_
+#include <string>
+#include <opencv2/core.hpp>
+#include <inference_engine.hpp>
+typedef struct HeadInfo
+{
+    std::string cls_layer;
+    std::string dis_layer;
+    int stride;
+} HeadInfo;
+typedef struct BoxInfo
+{
+    float x1;
+    float y1;
+    float x2;
+    float y2;
+    float score;
+    int label;
+} BoxInfo;
+class PicoDet
+{
+public:
+    PicoDet(const char* param);
+    ~PicoDet();
+    InferenceEngine::ExecutableNetwork network_;
+    InferenceEngine::InferRequest infer_request_;
+    // static bool hasGPU;
+    std::vector<HeadInfo> heads_info_{
+        // cls_pred|dis_pred|stride
+        {"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
+        {"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
+        {"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
+        {"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
+    };
+    std::vector<BoxInfo> detect(cv::Mat image, float score_threshold, float nms_threshold);
+private:
+    void preprocess(cv::Mat& image, InferenceEngine::Blob::Ptr& blob);
+    void decode_infer(const float*& cls_pred, const float*& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results);
+    BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride);
+    static void nms(std::vector<BoxInfo>& result, float nms_threshold);
+    std::string input_name_;
+    int input_size_ = 320;
+    int num_class_ = 80;
+    int reg_max_ = 7;
+};
+#endif