未验证 提交 34654225 编写于 作者: qq_30618961's avatar qq_30618961 提交者: GitHub

add 3rd infer engine (#4336)

* add 3rd infer engine
上级 3ee7bde2
cmake_minimum_required(VERSION 3.9)
project(picodet-mnn)
set(CMAKE_CXX_STANDARD 17)
# find_package(OpenCV REQUIRED PATHS "/work/dependence/opencv/opencv-3.4.3/build")
find_package(OpenCV REQUIRED)
include_directories(
/path/to/MNN/include/MNN
/path/to/MNN/include
.
)
link_directories(mnn/lib)
add_library(libMNN SHARED IMPORTED)
set_target_properties(
libMNN
PROPERTIES IMPORTED_LOCATION
${CMAKE_SOURCE_DIR}/mnn/lib/libMNN.so
)
add_executable(picodet-mnn main.cpp picodet_mnn.cpp)
target_link_libraries(picodet-mnn MNN ${OpenCV_LIBS} libMNN.so)
# PicoDet MNN Demo
This fold provides PicoDet inference code using
[Alibaba's MNN framework](https://github.com/alibaba/MNN). Most of the implements in
this fold are same as *demo_ncnn*.
## Install MNN
### Python library
Just run:
``` shell
pip install MNN
```
### C++ library
Please follow the [official document](https://www.yuque.com/mnn/en/build_linux) to build MNN engine.
- Create picodet_m_416_coco.onnx
```shell
modelName=picodet_m_416_coco
# export model
python tools/export_model.py \
-c configs/picodet/${modelName}.yml \
-o weights=${modelName}.pdparams \
--output_dir=inference_model
# convert to onnx
paddle2onnx --model_dir inference_model/${modelName} \
--model_filename model.pdmodel \
--params_filename model.pdiparams \
--opset_version 11 \
--save_file ${modelName}.onnx
# onnxsim
python -m onnxsim ${modelName}.onnx ${modelName}_processed.onnx
```
- Convert model
``` shell
python -m MNN.tools.mnnconvert -f ONNX --modelFile picodet-416.onnx --MNNModel picodet-416.mnn
```
Here are converted model [download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416.mnn).
## Build
The python code *demo_mnn.py* can run directly and independently without main PicoDet repo.
`PicoDetONNX` and `PicoDetTorch` are two classes used to check the similarity of MNN inference results
with ONNX model and Pytorch model. They can be remove with no side effects.
For C++ code, replace `libMNN.so` under *./mnn/lib* with the one you just compiled, modify OpenCV path and MNN path at CMake file,
and run
``` shell
mkdir build && cd build
cmake ..
make
```
Note that a flag at `main.cpp` is used to control whether to show the detection result or save it into a fold.
``` c++
#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows
```
## Run
### Python
`demo_mnn.py` provide an inference class `PicoDetMNN` that combines preprocess, post process, visualization.
Besides it can be used in command line with the form:
```shell
demo_mnn.py [-h] [--model_path MODEL_PATH] [--cfg_path CFG_PATH]
[--img_fold IMG_FOLD] [--result_fold RESULT_FOLD]
[--input_shape INPUT_SHAPE INPUT_SHAPE]
[--backend {MNN,ONNX,torch}]
```
For example:
``` shell
# run MNN 416 model
python ./demo_mnn.py --model_path ../model/picodet-416.mnn --img_fold ../imgs --result_fold ../results
# run MNN 320 model
python ./demo_mnn.py --model_path ../model/picodet-320.mnn --input_shape 320 320 --backend MNN
# run onnx model
python ./demo_mnn.py --model_path ../model/sim.onnx --backend ONNX
```
### C++
C++ inference interface is same with NCNN code, to detect images in a fold, run:
``` shell
./picodet-mnn "1" "../imgs/test.jpg"
```
For speed benchmark
``` shell
./picodet-mnn "3" "0"
```
## Reference
[MNN](https://github.com/alibaba/MNN)
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
#include "picodet_mnn.hpp"
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#define __SAVE_RESULT__ // if defined save drawed results to ../results, else show it in windows
struct object_rect {
int x;
int y;
int width;
int height;
};
int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
{
int w = src.cols;
int h = src.rows;
int dst_w = dst_size.width;
int dst_h = dst_size.height;
dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
float ratio_src = w * 1.0 / h;
float ratio_dst = dst_w * 1.0 / dst_h;
int tmp_w = 0;
int tmp_h = 0;
if (ratio_src > ratio_dst) {
tmp_w = dst_w;
tmp_h = floor((dst_w * 1.0 / w) * h);
}
else if (ratio_src < ratio_dst) {
tmp_h = dst_h;
tmp_w = floor((dst_h * 1.0 / h) * w);
}
else {
cv::resize(src, dst, dst_size);
effect_area.x = 0;
effect_area.y = 0;
effect_area.width = dst_w;
effect_area.height = dst_h;
return 0;
}
cv::Mat tmp;
cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
if (tmp_w != dst_w) {
int index_w = floor((dst_w - tmp_w) / 2.0);
for (int i = 0; i < dst_h; i++) {
memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
}
effect_area.x = index_w;
effect_area.y = 0;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
}
else if (tmp_h != dst_h) {
int index_h = floor((dst_h - tmp_h) / 2.0);
memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
effect_area.x = 0;
effect_area.y = index_h;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
}
else {
printf("error\n");
}
return 0;
}
const int color_list[80][3] =
{
{216 , 82 , 24},
{236 ,176 , 31},
{125 , 46 ,141},
{118 ,171 , 47},
{ 76 ,189 ,237},
{238 , 19 , 46},
{ 76 , 76 , 76},
{153 ,153 ,153},
{255 , 0 , 0},
{255 ,127 , 0},
{190 ,190 , 0},
{ 0 ,255 , 0},
{ 0 , 0 ,255},
{170 , 0 ,255},
{ 84 , 84 , 0},
{ 84 ,170 , 0},
{ 84 ,255 , 0},
{170 , 84 , 0},
{170 ,170 , 0},
{170 ,255 , 0},
{255 , 84 , 0},
{255 ,170 , 0},
{255 ,255 , 0},
{ 0 , 84 ,127},
{ 0 ,170 ,127},
{ 0 ,255 ,127},
{ 84 , 0 ,127},
{ 84 , 84 ,127},
{ 84 ,170 ,127},
{ 84 ,255 ,127},
{170 , 0 ,127},
{170 , 84 ,127},
{170 ,170 ,127},
{170 ,255 ,127},
{255 , 0 ,127},
{255 , 84 ,127},
{255 ,170 ,127},
{255 ,255 ,127},
{ 0 , 84 ,255},
{ 0 ,170 ,255},
{ 0 ,255 ,255},
{ 84 , 0 ,255},
{ 84 , 84 ,255},
{ 84 ,170 ,255},
{ 84 ,255 ,255},
{170 , 0 ,255},
{170 , 84 ,255},
{170 ,170 ,255},
{170 ,255 ,255},
{255 , 0 ,255},
{255 , 84 ,255},
{255 ,170 ,255},
{ 42 , 0 , 0},
{ 84 , 0 , 0},
{127 , 0 , 0},
{170 , 0 , 0},
{212 , 0 , 0},
{255 , 0 , 0},
{ 0 , 42 , 0},
{ 0 , 84 , 0},
{ 0 ,127 , 0},
{ 0 ,170 , 0},
{ 0 ,212 , 0},
{ 0 ,255 , 0},
{ 0 , 0 , 42},
{ 0 , 0 , 84},
{ 0 , 0 ,127},
{ 0 , 0 ,170},
{ 0 , 0 ,212},
{ 0 , 0 ,255},
{ 0 , 0 , 0},
{ 36 , 36 , 36},
{ 72 , 72 , 72},
{109 ,109 ,109},
{145 ,145 ,145},
{182 ,182 ,182},
{218 ,218 ,218},
{ 0 ,113 ,188},
{ 80 ,182 ,188},
{127 ,127 , 0},
};
void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi, std::string save_path="None")
{
static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
"train", "truck", "boat", "traffic light", "fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
int src_w = image.cols;
int src_h = image.rows;
int dst_w = effect_roi.width;
int dst_h = effect_roi.height;
float width_ratio = (float)src_w / (float)dst_w;
float height_ratio = (float)src_h / (float)dst_h;
for (size_t i = 0; i < bboxes.size(); i++)
{
const BoxInfo& bbox = bboxes[i];
cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
int x = (bbox.x1 - effect_roi.x) * width_ratio;
int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
color, -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
}
if (save_path == "None")
{
cv::imshow("image", image);
}
else
{
cv::imwrite(save_path, image);
std::cout << save_path << std::endl;
}
}
int image_demo(PicoDet &detector, const char* imagepath)
{
std::vector<cv::String> filenames;
cv::glob(imagepath, filenames, false);
for (auto img_name : filenames)
{
cv::Mat image = cv::imread(img_name);
if (image.empty())
{
fprintf(stderr, "cv::imread %s failed\n", img_name.c_str());
return -1;
}
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
std::vector<BoxInfo> results;
detector.detect(resized_img, results);
#ifdef __SAVE_RESULT__
std::string save_path = img_name;
draw_bboxes(image, results, effect_roi, save_path.replace(3, 4, "results"));
#else
draw_bboxes(image, results, effect_roi);
cv::waitKey(0);
#endif
}
return 0;
}
int webcam_demo(PicoDet& detector, int cam_id)
{
cv::Mat image;
cv::VideoCapture cap(cam_id);
while (true)
{
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
std::vector<BoxInfo> results;
detector.detect(resized_img, results);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int video_demo(PicoDet& detector, const char* path)
{
cv::Mat image;
cv::VideoCapture cap(path);
while (true)
{
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
std::vector<BoxInfo> results;
detector.detect(resized_img, results);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int benchmark(PicoDet& detector)
{
int loop_num = 100;
int warm_up = 8;
double time_min = DBL_MAX;
double time_max = -DBL_MAX;
double time_avg = 0;
cv::Mat image(320, 320, CV_8UC3, cv::Scalar(1, 1, 1));
for (int i = 0; i < warm_up + loop_num; i++)
{
auto start = std::chrono::steady_clock::now();
std::vector<BoxInfo> results;
detector.detect(image, results);
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double> elapsed = end - start;
double time = elapsed.count();
if (i >= warm_up)
{
time_min = (std::min)(time_min, time);
time_max = (std::max)(time_max, time);
time_avg += time;
}
}
time_avg /= loop_num;
fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
return 0;
}
int main(int argc, char** argv)
{
if (argc != 3)
{
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
return -1;
}
PicoDet detector = PicoDet("../weight/picodet-416.mnn", 416, 416, 4, 0.45, 0.3);
int mode = atoi(argv[1]);
switch (mode)
{
case 0:{
int cam_id = atoi(argv[2]);
webcam_demo(detector, cam_id);
break;
}
case 1:{
const char* images = argv[2];
image_demo(detector, images);
break;
}
case 2:{
const char* path = argv[2];
video_demo(detector, path);
break;
}
case 3:{
benchmark(detector);
break;
}
default:{
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
break;
}
}
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
#include "picodet_mnn.hpp"
using namespace std;
PicoDet::PicoDet(const std::string &mnn_path,
int input_width, int input_length, int num_thread_,
float score_threshold_, float nms_threshold_)
{
num_thread = num_thread_;
in_w = input_width;
in_h = input_length;
score_threshold = score_threshold_;
nms_threshold = nms_threshold_;
PicoDet_interpreter = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(mnn_path.c_str()));
MNN::ScheduleConfig config;
config.numThread = num_thread;
MNN::BackendConfig backendConfig;
backendConfig.precision = (MNN::BackendConfig::PrecisionMode) 2;
config.backendConfig = &backendConfig;
PicoDet_session = PicoDet_interpreter->createSession(config);
input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr);
}
PicoDet::~PicoDet()
{
PicoDet_interpreter->releaseModel();
PicoDet_interpreter->releaseSession(PicoDet_session);
}
int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list)
{
if (raw_image.empty()) {
std::cout << "image is empty ,please check!" << std::endl;
return -1;
}
image_h = raw_image.rows;
image_w = raw_image.cols;
cv::Mat image;
cv::resize(raw_image, image, cv::Size(in_w, in_h));
PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w});
PicoDet_interpreter->resizeSession(PicoDet_session);
std::shared_ptr<MNN::CV::ImageProcess> pretreat(
MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3,
norm_vals, 3));
pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor);
auto start = chrono::steady_clock::now();
// run network
PicoDet_interpreter->runSession(PicoDet_session);
// get output data
std::vector<std::vector<BoxInfo>> results;
results.resize(num_class);
for (const auto &head_info : heads_info)
{
MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.cls_layer.c_str());
MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.dis_layer.c_str());
MNN::Tensor tensor_scores_host(tensor_scores, tensor_scores->getDimensionType());
tensor_scores->copyToHostTensor(&tensor_scores_host);
MNN::Tensor tensor_boxes_host(tensor_boxes, tensor_boxes->getDimensionType());
tensor_boxes->copyToHostTensor(&tensor_boxes_host);
decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride, score_threshold, results);
}
auto end = chrono::steady_clock::now();
chrono::duration<double> elapsed = end - start;
cout << "inference time:" << elapsed.count() << " s, ";
for (int i = 0; i < (int)results.size(); i++)
{
nms(results[i], nms_threshold);
for (auto box : results[i])
{
box.x1 = box.x1 / in_w * image_w;
box.x2 = box.x2 / in_w * image_w;
box.y1 = box.y1 / in_h * image_h;
box.y2 = box.y2 / in_h * image_h;
result_list.push_back(box);
}
}
cout << "detect " << result_list.size() << " objects" << endl;
return 0;
}
void PicoDet::decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>> &results)
{
int feature_h = in_h / stride;
int feature_w = in_w / stride;
for (int idx = 0; idx < feature_h * feature_w; idx++)
{
const float *scores = cls_pred->host<float>() + (idx * num_class);
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < num_class; label++)
{
if (scores[label] > score)
{
score = scores[label];
cur_label = label;
}
}
if (score > threshold)
{
const float *bbox_pred = dis_pred->host<float>() + (idx * 4 * (reg_max + 1));
results[cur_label].push_back(disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
}
BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride)
{
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++)
{
float dis = 0;
float *dis_after_sm = new float[reg_max + 1];
activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1);
for (int j = 0; j < reg_max + 1; j++)
{
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w);
float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h);
return BoxInfo{xmin, ymin, xmax, ymax, score, label};
}
void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH)
{
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i)
{
for (int j = i + 1; j < int(input_boxes.size());)
{
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
string PicoDet::get_label_str(int label)
{
return labels[label];
}
inline float fast_exp(float x)
{
union
{
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x)
{
return 1.0f / (1.0f + fast_exp(-x));
}
template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length)
{
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0};
for (int i = 0; i < length; ++i)
{
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i)
{
dst[i] /= denominator;
}
return 0;
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
#ifndef __PicoDet_H__
#define __PicoDet_H__
#pragma once
#include "Interpreter.hpp"
#include "MNNDefine.h"
#include "Tensor.hpp"
#include "ImageProcess.hpp"
#include <opencv2/opencv.hpp>
#include <algorithm>
#include <iostream>
#include <string>
#include <vector>
#include <memory>
#include <chrono>
typedef struct HeadInfo_
{
std::string cls_layer;
std::string dis_layer;
int stride;
} HeadInfo;
typedef struct BoxInfo_
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class PicoDet {
public:
PicoDet(const std::string &mnn_path,
int input_width, int input_length, int num_thread_ = 4, float score_threshold_ = 0.5, float nms_threshold_ = 0.3);
~PicoDet();
int detect(cv::Mat &img, std::vector<BoxInfo> &result_list);
std::string get_label_str(int label);
private:
void decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>> &results);
BoxInfo disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride);
void nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH);
private:
std::shared_ptr<MNN::Interpreter> PicoDet_interpreter;
MNN::Session *PicoDet_session = nullptr;
MNN::Tensor *input_tensor = nullptr;
int num_thread;
int image_w;
int image_h;
int in_w = 320;
int in_h = 320;
float score_threshold;
float nms_threshold;
const float mean_vals[3] = { 103.53f, 116.28f, 123.675f };
const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f };
const int num_class = 80;
const int reg_max = 7;
std::vector<HeadInfo> heads_info{
// cls_pred|dis_pred|stride
{"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
{"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
{"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
{"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
};
std::vector<std::string>
labels{"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
};
template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length);
inline float fast_exp(float x);
inline float sigmoid(float x);
#endif
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
# -*- coding: utf-8 -*-
import argparse
from abc import ABCMeta, abstractmethod
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import numpy as np
from scipy.special import softmax
from tqdm import tqdm
_COLORS = (np.array([
0.000,
0.447,
0.741,
0.850,
0.325,
0.098,
0.929,
0.694,
0.125,
0.494,
0.184,
0.556,
0.466,
0.674,
0.188,
0.301,
0.745,
0.933,
0.635,
0.078,
0.184,
0.300,
0.300,
0.300,
0.600,
0.600,
0.600,
1.000,
0.000,
0.000,
1.000,
0.500,
0.000,
0.749,
0.749,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
1.000,
0.667,
0.000,
1.000,
0.333,
0.333,
0.000,
0.333,
0.667,
0.000,
0.333,
1.000,
0.000,
0.667,
0.333,
0.000,
0.667,
0.667,
0.000,
0.667,
1.000,
0.000,
1.000,
0.333,
0.000,
1.000,
0.667,
0.000,
1.000,
1.000,
0.000,
0.000,
0.333,
0.500,
0.000,
0.667,
0.500,
0.000,
1.000,
0.500,
0.333,
0.000,
0.500,
0.333,
0.333,
0.500,
0.333,
0.667,
0.500,
0.333,
1.000,
0.500,
0.667,
0.000,
0.500,
0.667,
0.333,
0.500,
0.667,
0.667,
0.500,
0.667,
1.000,
0.500,
1.000,
0.000,
0.500,
1.000,
0.333,
0.500,
1.000,
0.667,
0.500,
1.000,
1.000,
0.500,
0.000,
0.333,
1.000,
0.000,
0.667,
1.000,
0.000,
1.000,
1.000,
0.333,
0.000,
1.000,
0.333,
0.333,
1.000,
0.333,
0.667,
1.000,
0.333,
1.000,
1.000,
0.667,
0.000,
1.000,
0.667,
0.333,
1.000,
0.667,
0.667,
1.000,
0.667,
1.000,
1.000,
1.000,
0.000,
1.000,
1.000,
0.333,
1.000,
1.000,
0.667,
1.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.167,
0.000,
0.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.167,
0.000,
0.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.143,
0.143,
0.143,
0.286,
0.286,
0.286,
0.429,
0.429,
0.429,
0.571,
0.571,
0.571,
0.714,
0.714,
0.714,
0.857,
0.857,
0.857,
0.000,
0.447,
0.741,
0.314,
0.717,
0.741,
0.50,
0.5,
0,
]).astype(np.float32).reshape(-1, 3))
def get_resize_matrix(raw_shape, dst_shape, keep_ratio):
"""
Get resize matrix for resizing raw img to input size
:param raw_shape: (width, height) of raw image
:param dst_shape: (width, height) of input image
:param keep_ratio: whether keep original ratio
:return: 3x3 Matrix
"""
r_w, r_h = raw_shape
d_w, d_h = dst_shape
Rs = np.eye(3)
if keep_ratio:
C = np.eye(3)
C[0, 2] = -r_w / 2
C[1, 2] = -r_h / 2
if r_w / r_h < d_w / d_h:
ratio = d_h / r_h
else:
ratio = d_w / r_w
Rs[0, 0] *= ratio
Rs[1, 1] *= ratio
T = np.eye(3)
T[0, 2] = 0.5 * d_w
T[1, 2] = 0.5 * d_h
return T @Rs @C
else:
Rs[0, 0] *= d_w / r_w
Rs[1, 1] *= d_h / r_h
return Rs
def warp_boxes(boxes, M, width, height):
"""Apply transform to boxes
Copy from picodet/data/transform/warp.py
"""
n = len(boxes)
if n:
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @M.T # transform
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip boxes
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
return xy.astype(np.float32)
else:
return boxes
def overlay_bbox_cv(img, all_box, class_names):
"""Draw result boxes
Copy from picodet/util/visualization.py
"""
# all_box array of [label, x0, y0, x1, y1, score]
all_box.sort(key=lambda v: v[5])
for box in all_box:
label, x0, y0, x1, y1, score = box
color = (_COLORS[label] * 255).astype(np.uint8).tolist()
text = "{}:{:.1f}%".format(class_names[label], score * 100)
txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255,
255)
font = cv2.FONT_HERSHEY_SIMPLEX
txt_size = cv2.getTextSize(text, font, 0.5, 2)[0]
cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
cv2.rectangle(
img,
(x0, y0 - txt_size[1] - 1),
(x0 + txt_size[0] + txt_size[1], y0 - 1),
color,
-1, )
cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1)
return img
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
np.expand_dims(
current_box, axis=0), )
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def iou_of(boxes0, boxes1, eps=1e-5):
"""Return intersection-over-union (Jaccard index) of boxes.
Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def area_of(left_top, right_bottom):
"""Compute the areas of rectangles given two corners.
Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.
Returns:
area (N): return the area.
"""
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
class PicoDetABC(metaclass=ABCMeta):
def __init__(
self,
input_shape=[416, 416],
reg_max=7,
strides=[8, 16, 32, 64],
prob_threshold=0.4,
iou_threshold=0.3,
num_candidate=1000,
top_k=-1, ):
self.strides = strides
self.input_shape = input_shape
self.reg_max = reg_max
self.prob_threshold = prob_threshold
self.iou_threshold = iou_threshold
self.num_candidate = num_candidate
self.top_k = top_k
self.img_mean = [103.53, 116.28, 123.675]
self.img_std = [57.375, 57.12, 58.395]
self.input_size = (self.input_shape[1], self.input_shape[0])
self.class_names = [
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic_light",
"fire_hydrant",
"stop_sign",
"parking_meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports_ball",
"kite",
"baseball_bat",
"baseball_glove",
"skateboard",
"surfboard",
"tennis_racket",
"bottle",
"wine_glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot_dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted_plant",
"bed",
"dining_table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell_phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy_bear",
"hair_drier",
"toothbrush",
]
def preprocess(self, img):
# resize image
ResizeM = get_resize_matrix((img.shape[1], img.shape[0]),
self.input_size, True)
img_resize = cv2.warpPerspective(img, ResizeM, dsize=self.input_size)
# normalize image
img_input = img_resize.astype(np.float32) / 255
img_mean = np.array(
self.img_mean, dtype=np.float32).reshape(1, 1, 3) / 255
img_std = np.array(
self.img_std, dtype=np.float32).reshape(1, 1, 3) / 255
img_input = (img_input - img_mean) / img_std
# expand dims
img_input = np.transpose(img_input, [2, 0, 1])
img_input = np.expand_dims(img_input, axis=0)
return img_input, ResizeM
def postprocess(self, scores, raw_boxes, ResizeM, raw_shape):
# generate centers
decode_boxes = []
select_scores = []
for stride, box_distribute, score in zip(self.strides, raw_boxes,
scores):
# centers
fm_h = self.input_shape[0] / stride
fm_w = self.input_shape[1] / stride
h_range = np.arange(fm_h)
w_range = np.arange(fm_w)
ww, hh = np.meshgrid(w_range, h_range)
ct_row = (hh.flatten() + 0.5) * stride
ct_col = (ww.flatten() + 0.5) * stride
center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
# box distribution to distance
reg_range = np.arange(self.reg_max + 1)
box_distance = box_distribute.reshape((-1, self.reg_max + 1))
box_distance = softmax(box_distance, axis=1)
box_distance = box_distance * np.expand_dims(reg_range, axis=0)
box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
box_distance = box_distance * stride
# top K candidate
topk_idx = np.argsort(score.max(axis=1))[::-1]
topk_idx = topk_idx[:C]
center = center[topk_idx]
score = score[topk_idx]
box_distance = box_distance[topk_idx]
# decode box
decode_box = center + [-1, -1, 1, 1] * box_distance
select_scores.append(score)
decode_boxes.append(decode_box)
# nms
bboxes = np.concatenate(decode_boxes, axis=0)
confidences = np.concatenate(select_scores, axis=0)
picked_box_probs = []
picked_labels = []
for class_index in range(0, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > self.prob_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = bboxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(
box_probs,
iou_threshold=self.iou_threshold,
top_k=self.top_k, )
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if not picked_box_probs:
return np.array([]), np.array([]), np.array([])
picked_box_probs = np.concatenate(picked_box_probs)
# resize output boxes
picked_box_probs[:, :4] = warp_boxes(picked_box_probs[:, :4],
np.linalg.inv(ResizeM),
raw_shape[1], raw_shape[0])
return (
picked_box_probs[:, :4].astype(np.int32),
np.array(picked_labels),
picked_box_probs[:, 4], )
@abstractmethod
def infer_image(self, img_input):
pass
def detect(self, img):
raw_shape = img.shape
img_input, ResizeM = self.preprocess(img)
scores, raw_boxes = self.infer_image(img_input)
if scores[0].ndim == 1: # handling num_classes=1 case
scores = [x[:, None] for x in scores]
bbox, label, score = self.postprocess(scores, raw_boxes, ResizeM,
raw_shape)
print(bbox, score)
return bbox, label, score
def draw_box(self, raw_img, bbox, label, score):
img = raw_img.copy()
all_box = [[x, ] + y + [z, ]
for x, y, z in zip(label, bbox.tolist(), score)]
img_draw = overlay_bbox_cv(img, all_box, self.class_names)
return img_draw
def detect_folder(self, img_fold, result_path):
img_fold = Path(img_fold)
result_path = Path(result_path)
result_path.mkdir(parents=True, exist_ok=True)
img_name_list = filter(
lambda x: str(x).endswith(".png") or str(x).endswith(".jpg"),
img_fold.iterdir(), )
img_name_list = list(img_name_list)
print(f"find {len(img_name_list)} images")
for img_path in tqdm(img_name_list):
img = cv2.imread(str(img_path))
bbox, label, score = self.detect(img)
img_draw = self.draw_box(img, bbox, label, score)
save_path = str(result_path / img_path.name.replace(".png", ".jpg"))
cv2.imwrite(save_path, img_draw)
class PicoDetMNN(PicoDetABC):
import MNN as MNNlib
def __init__(self, model_path, *args, **kwargs):
super(PicoDetMNN, self).__init__(*args, **kwargs)
print("Using MNN as inference backend")
print(f"Using weight: {model_path}")
# load model
self.model_path = model_path
self.interpreter = self.MNNlib.Interpreter(self.model_path)
self.session = self.interpreter.createSession()
self.input_tensor = self.interpreter.getSessionInput(self.session)
def infer_image(self, img_input):
tmp_input = self.MNNlib.Tensor(
(1, 3, self.input_size[1], self.input_size[0]),
self.MNNlib.Halide_Type_Float,
img_input,
self.MNNlib.Tensor_DimensionType_Caffe, )
self.input_tensor.copyFrom(tmp_input)
self.interpreter.runSession(self.session)
score_out_name = [
"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_1.tmp_1",
"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_3.tmp_1"
]
scores = [
self.interpreter.getSessionOutput(self.session, x).getData()
for x in score_out_name
]
scores = [np.reshape(x, (-1, 80)) for x in scores]
boxes_out_name = [
"save_infer_model/scale_4.tmp_1", "save_infer_model/scale_5.tmp_1",
"save_infer_model/scale_6.tmp_1", "save_infer_model/scale_7.tmp_1"
]
raw_boxes = [
self.interpreter.getSessionOutput(self.session, x).getData()
for x in boxes_out_name
]
raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes]
return scores, raw_boxes
class PicoDetONNX(PicoDetABC):
import onnxruntime as ort
def __init__(self, model_path, *args, **kwargs):
super(PicoDetONNX, self).__init__(*args, **kwargs)
print("Using ONNX as inference backend")
print(f"Using weight: {model_path}")
# load model
self.model_path = model_path
self.ort_session = self.ort.InferenceSession(self.model_path)
self.input_name = self.ort_session.get_inputs()[0].name
def infer_image(self, img_input):
inference_results = self.ort_session.run(None,
{self.input_name: img_input})
scores = [np.squeeze(x) for x in inference_results[:3]]
raw_boxes = [np.squeeze(x) for x in inference_results[3:]]
return scores, raw_boxes
class PicoDetTorch(PicoDetABC):
import torch
def __init__(self, model_path, cfg_path, *args, **kwargs):
from picodet.model.arch import build_model
from picodet.util import Logger, cfg, load_config, load_model_weight
super(PicoDetTorch, self).__init__(*args, **kwargs)
print("Using PyTorch as inference backend")
print(f"Using weight: {model_path}")
# load model
self.model_path = model_path
self.cfg_path = cfg_path
load_config(cfg, cfg_path)
self.logger = Logger(-1, cfg.save_dir, False)
self.model = build_model(cfg.model)
checkpoint = self.torch.load(
model_path, map_location=lambda storage, loc: storage)
load_model_weight(self.model, checkpoint, self.logger)
def infer_image(self, img_input):
self.model.train(False)
with self.torch.no_grad():
inference_results = self.model(self.torch.from_numpy(img_input))
scores = [
x.permute(0, 2, 3, 1).reshape((-1, 80)).sigmoid().detach().numpy()
for x in inference_results[0]
]
raw_boxes = [
x.permute(0, 2, 3, 1).reshape((-1, 32)).detach().numpy()
for x in inference_results[1]
]
return scores, raw_boxes
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path",
dest="model_path",
type=str,
default="../model/picodet-320.mnn")
parser.add_argument(
"--cfg_path", dest="cfg_path", type=str, default="config/picodet-m.yml")
parser.add_argument(
"--img_fold", dest="img_fold", type=str, default="../imgs")
parser.add_argument(
"--result_fold", dest="result_fold", type=str, default="../results")
parser.add_argument(
"--input_shape",
dest="input_shape",
nargs=2,
type=int,
default=[320, 320])
parser.add_argument(
"--backend", choices=["MNN", "ONNX", "torch"], default="MNN")
args = parser.parse_args()
print(f"Detecting {args.img_fold}")
# load detector
if args.backend == "MNN":
detector = PicoDetMNN(args.model_path, input_shape=args.input_shape)
elif args.backend == "ONNX":
detector = PicoDetONNX(args.model_path, input_shape=args.input_shape)
elif args.backend == "torch":
detector = PicoDetTorch(
args.model_path, args.cfg_path, input_shape=args.input_shape)
else:
raise ValueError
# detect folder
detector.detect_folder(args.img_fold, args.result_fold)
def test_one():
detector = PicoDetMNN("../weight/picodet-416.mnn")
img = cv2.imread("../imgs/000252.jpg")
bbox, label, score = detector.detect(img)
img_draw = detector.draw_box(img, bbox, label, score)
cv2.imwrite('picodet_infer.jpg', img_draw)
if __name__ == "__main__":
# main()
test_one()
cmake_minimum_required(VERSION 3.4.1)
set(CMAKE_CXX_STANDARD 17)
project(picodet_demo)
find_package(OpenMP REQUIRED)
if(OPENMP_FOUND)
message("OPENMP FOUND")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()
find_package(OpenCV REQUIRED)
find_package(ncnn REQUIRED)
if(NOT TARGET ncnn)
message(WARNING "ncnn NOT FOUND! Please set ncnn_DIR environment variable")
else()
message("ncnn FOUND ")
endif()
include_directories(
${OpenCV_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
)
add_executable(picodet_demo main.cpp picodet.cpp)
target_link_libraries(
picodet_demo
ncnn
${OpenCV_LIBS}
)
# PicoDet NCNN Demo
This project provides PicoDet image inference, webcam inference and benchmark using
[Tencent's NCNN framework](https://github.com/Tencent/ncnn).
# How to build
## Windows
### Step1.
Download and Install Visual Studio from https://visualstudio.microsoft.com/vs/community/
### Step2.
Download and install OpenCV from https://github.com/opencv/opencv/releases
### Step3(Optional).
Download and install Vulkan SDK from https://vulkan.lunarg.com/sdk/home
### Step4.
Clone NCNN repository
``` shell script
git clone --recursive https://github.com/Tencent/ncnn.git
```
Build NCNN following this tutorial: [Build for Windows x64 using VS2017](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-windows-x64-using-visual-studio-community-2017)
### Step5.
Add `ncnn_DIR` = `YOUR_NCNN_PATH/build/install/lib/cmake/ncnn` to system environment variables.
Build project: Open x64 Native Tools Command Prompt for VS 2019 or 2017
``` cmd
cd <this-folder>
mkdir -p build
cd build
cmake ..
msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64
```
## Linux
### Step1.
Build and install OpenCV from https://github.com/opencv/opencv
### Step2(Optional).
Download Vulkan SDK from https://vulkan.lunarg.com/sdk/home
### Step3.
Clone NCNN repository
``` shell script
git clone --recursive https://github.com/Tencent/ncnn.git
```
Build NCNN following this tutorial: [Build for Linux / NVIDIA Jetson / Raspberry Pi](https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-linux)
### Step4.
Set environment variables. Run:
``` shell script
export ncnn_DIR=YOUR_NCNN_PATH/build/install/lib/cmake/ncnn
```
Build project
``` shell script
cd <this-folder>
mkdir build
cd build
cmake ..
make
```
# Run demo
Download PicoDet ncnn model.
* [PicoDet ncnn model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_ncnn.zip)
## Webcam
```shell script
picodet_demo 0 0
```
## Inference images
```shell script
picodet_demo 1 IMAGE_FOLDER/*.jpg
```
## Inference video
```shell script
picodet_demo 2 VIDEO_PATH
```
## Benchmark
```shell script
picodet_demo 3 0
result: picodet min = 17.74 max = 22.71 avg = 18.16
```
****
Notice:
If benchmark speed is slow, try to limit omp thread num.
Linux:
```shell script
export OMP_THREAD_LIMIT=4
```
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#include <net.h>
#include "picodet.h"
#include <benchmark.h>
struct object_rect {
int x;
int y;
int width;
int height;
};
int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
{
int w = src.cols;
int h = src.rows;
int dst_w = dst_size.width;
int dst_h = dst_size.height;
dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
float ratio_src = w * 1.0 / h;
float ratio_dst = dst_w * 1.0 / dst_h;
int tmp_w = 0;
int tmp_h = 0;
if (ratio_src > ratio_dst) {
tmp_w = dst_w;
tmp_h = floor((dst_w * 1.0 / w) * h);
}
else if (ratio_src < ratio_dst) {
tmp_h = dst_h;
tmp_w = floor((dst_h * 1.0 / h) * w);
}
else {
cv::resize(src, dst, dst_size);
effect_area.x = 0;
effect_area.y = 0;
effect_area.width = dst_w;
effect_area.height = dst_h;
return 0;
}
cv::Mat tmp;
cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
if (tmp_w != dst_w) {
int index_w = floor((dst_w - tmp_w) / 2.0);
for (int i = 0; i < dst_h; i++) {
memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
}
effect_area.x = index_w;
effect_area.y = 0;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
}
else if (tmp_h != dst_h) {
int index_h = floor((dst_h - tmp_h) / 2.0);
memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
effect_area.x = 0;
effect_area.y = index_h;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
}
else {
printf("error\n");
}
return 0;
}
const int color_list[80][3] =
{
{216 , 82 , 24},
{236 ,176 , 31},
{125 , 46 ,141},
{118 ,171 , 47},
{ 76 ,189 ,237},
{238 , 19 , 46},
{ 76 , 76 , 76},
{153 ,153 ,153},
{255 , 0 , 0},
{255 ,127 , 0},
{190 ,190 , 0},
{ 0 ,255 , 0},
{ 0 , 0 ,255},
{170 , 0 ,255},
{ 84 , 84 , 0},
{ 84 ,170 , 0},
{ 84 ,255 , 0},
{170 , 84 , 0},
{170 ,170 , 0},
{170 ,255 , 0},
{255 , 84 , 0},
{255 ,170 , 0},
{255 ,255 , 0},
{ 0 , 84 ,127},
{ 0 ,170 ,127},
{ 0 ,255 ,127},
{ 84 , 0 ,127},
{ 84 , 84 ,127},
{ 84 ,170 ,127},
{ 84 ,255 ,127},
{170 , 0 ,127},
{170 , 84 ,127},
{170 ,170 ,127},
{170 ,255 ,127},
{255 , 0 ,127},
{255 , 84 ,127},
{255 ,170 ,127},
{255 ,255 ,127},
{ 0 , 84 ,255},
{ 0 ,170 ,255},
{ 0 ,255 ,255},
{ 84 , 0 ,255},
{ 84 , 84 ,255},
{ 84 ,170 ,255},
{ 84 ,255 ,255},
{170 , 0 ,255},
{170 , 84 ,255},
{170 ,170 ,255},
{170 ,255 ,255},
{255 , 0 ,255},
{255 , 84 ,255},
{255 ,170 ,255},
{ 42 , 0 , 0},
{ 84 , 0 , 0},
{127 , 0 , 0},
{170 , 0 , 0},
{212 , 0 , 0},
{255 , 0 , 0},
{ 0 , 42 , 0},
{ 0 , 84 , 0},
{ 0 ,127 , 0},
{ 0 ,170 , 0},
{ 0 ,212 , 0},
{ 0 ,255 , 0},
{ 0 , 0 , 42},
{ 0 , 0 , 84},
{ 0 , 0 ,127},
{ 0 , 0 ,170},
{ 0 , 0 ,212},
{ 0 , 0 ,255},
{ 0 , 0 , 0},
{ 36 , 36 , 36},
{ 72 , 72 , 72},
{109 ,109 ,109},
{145 ,145 ,145},
{182 ,182 ,182},
{218 ,218 ,218},
{ 0 ,113 ,188},
{ 80 ,182 ,188},
{127 ,127 , 0},
};
void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi)
{
static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
"train", "truck", "boat", "traffic light", "fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
int src_w = image.cols;
int src_h = image.rows;
int dst_w = effect_roi.width;
int dst_h = effect_roi.height;
float width_ratio = (float)src_w / (float)dst_w;
float height_ratio = (float)src_h / (float)dst_h;
for (size_t i = 0; i < bboxes.size(); i++)
{
const BoxInfo& bbox = bboxes[i];
cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
int x = (bbox.x1 - effect_roi.x) * width_ratio;
int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
color, -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
}
cv::imwrite("../result/test_picodet.jpg", image);
printf("************infer image success!!!**********\n");
}
int image_demo(PicoDet &detector, const char* imagepath)
{
std::vector<std::string> filenames;
cv::glob(imagepath, filenames, false);
for (auto img_name : filenames)
{
cv::Mat image = cv::imread(img_name);
if (image.empty())
{
fprintf(stderr, "cv::imread %s failed\n", img_name);
return -1;
}
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
char imgName[20] = {};
draw_bboxes(image, results, effect_roi);
cv::waitKey(0);
}
return 0;
}
int webcam_demo(PicoDet& detector, int cam_id)
{
cv::Mat image;
cv::VideoCapture cap(cam_id);
while (true)
{
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int video_demo(PicoDet& detector, const char* path)
{
cv::Mat image;
cv::VideoCapture cap(path);
while (true)
{
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int benchmark(PicoDet& detector)
{
int loop_num = 100;
int warm_up = 8;
double time_min = DBL_MAX;
double time_max = -DBL_MAX;
double time_avg = 0;
ncnn::Mat input = ncnn::Mat(320, 320, 3);
input.fill(0.01f);
for (int i = 0; i < warm_up + loop_num; i++)
{
double start = ncnn::get_current_time();
ncnn::Extractor ex = detector.Net->create_extractor();
ex.input("image", input); // picodet
for (const auto& head_info : detector.heads_info)
{
ncnn::Mat dis_pred;
ncnn::Mat cls_pred;
ex.extract(head_info.dis_layer.c_str(), dis_pred);
ex.extract(head_info.cls_layer.c_str(), cls_pred);
}
double end = ncnn::get_current_time();
double time = end - start;
if (i >= warm_up)
{
time_min = (std::min)(time_min, time);
time_max = (std::max)(time_max, time);
time_avg += time;
}
}
time_avg /= loop_num;
fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
return 0;
}
int main(int argc, char** argv)
{
if (argc != 3)
{
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
return -1;
}
PicoDet detector = PicoDet("../weight/picodet_m_416.param", "../weight/picodet_m_416.bin", true);
int mode = atoi(argv[1]);
switch (mode)
{
case 0:{
int cam_id = atoi(argv[2]);
webcam_demo(detector, cam_id);
break;
}
case 1:{
const char* images = argv[2];
image_demo(detector, images);
break;
}
case 2:{
const char* path = argv[2];
video_demo(detector, path);
break;
}
case 3:{
benchmark(detector);
break;
}
default:{
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
break;
}
}
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
#include "picodet.h"
#include <benchmark.h>
#include <iostream>
inline float fast_exp(float x)
{
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x)
{
return 1.0f / (1.0f + fast_exp(-x));
}
template<typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
{
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{ 0 };
for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0;
}
bool PicoDet::hasGPU = false;
PicoDet* PicoDet::detector = nullptr;
PicoDet::PicoDet(const char* param, const char* bin, bool useGPU)
{
this->Net = new ncnn::Net();
#if NCNN_VULKAN
this->hasGPU = ncnn::get_gpu_count() > 0;
#endif
this->Net->opt.use_vulkan_compute = this->hasGPU && useGPU;
this->Net->opt.use_fp16_arithmetic = true;
this->Net->load_param(param);
this->Net->load_model(bin);
}
PicoDet::~PicoDet()
{
delete this->Net;
}
void PicoDet::preprocess(cv::Mat& image, ncnn::Mat& in)
{
int img_w = image.cols;
int img_h = image.rows;
in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h);
const float mean_vals[3] = { 103.53f, 116.28f, 123.675f };
const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f };
in.substract_mean_normalize(mean_vals, norm_vals);
}
std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold)
{
ncnn::Mat input;
preprocess(image, input);
auto ex = this->Net->create_extractor();
ex.set_light_mode(false);
ex.set_num_threads(4);
#if NCNN_VULKAN
ex.set_vulkan_compute(this->hasGPU);
#endif
ex.input("image", input); //picodet
std::vector<std::vector<BoxInfo>> results;
results.resize(this->num_class);
for (const auto& head_info : this->heads_info)
{
ncnn::Mat dis_pred;
ncnn::Mat cls_pred;
ex.extract(head_info.dis_layer.c_str(), dis_pred);
ex.extract(head_info.cls_layer.c_str(), cls_pred);
this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results);
}
std::vector<BoxInfo> dets;
for (int i = 0; i < (int)results.size(); i++)
{
this->nms(results[i], nms_threshold);
for (auto box : results[i])
{
dets.push_back(box);
}
}
return dets;
}
void PicoDet::decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results)
{
int feature_h = this->input_size[1] / stride;
int feature_w = this->input_size[0] / stride;
for (int idx = 0; idx < feature_h * feature_w; idx++)
{
const float* scores = cls_pred.row(idx);
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < this->num_class; label++)
{
if (scores[label] > score)
{
score = scores[label];
cur_label = label;
}
}
if (score > threshold)
{
const float* bbox_pred = dis_pred.row(idx);
results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
}
BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride)
{
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++)
{
float dis = 0;
float* dis_after_sm = new float[this->reg_max + 1];
activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm, this->reg_max + 1);
for (int j = 0; j < this->reg_max + 1; j++)
{
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]);
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]);
return BoxInfo { xmin, ymin, xmax, ymax, score, label };
}
void PicoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH)
{
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH) {
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else {
j++;
}
}
}
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
#ifndef PICODET_H
#define PICODET_H
#include <opencv2/core/core.hpp>
#include <net.h>
typedef struct HeadInfo
{
std::string cls_layer;
std::string dis_layer;
int stride;
};
typedef struct BoxInfo
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class PicoDet
{
public:
PicoDet(const char* param, const char* bin, bool useGPU);
~PicoDet();
static PicoDet* detector;
ncnn::Net* Net;
static bool hasGPU;
std::vector<HeadInfo> heads_info{
// cls_pred|dis_pred|stride
{"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
{"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
{"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
{"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
};
std::vector<BoxInfo> detect(cv::Mat image, float score_threshold, float nms_threshold);
std::vector<std::string> labels{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush" };
private:
void preprocess(cv::Mat& image, ncnn::Mat& in);
void decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results);
BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride);
static void nms(std::vector<BoxInfo>& result, float nms_threshold);
int input_size[2] = {320, 320};
int num_class = 80;
int reg_max = 7;
};
#endif
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# reference from https://github.com/RangiLyu/nanodet/tree/main/demo_ncnn
# -*- coding: utf-8 -*-
import argparse
from abc import ABCMeta, abstractmethod
from pathlib import Path
import cv2
import matplotlib.pyplot as plt
import numpy as np
from scipy.special import softmax
from tqdm import tqdm
_COLORS = (np.array([
0.000,
0.447,
0.741,
0.850,
0.325,
0.098,
0.929,
0.694,
0.125,
0.494,
0.184,
0.556,
0.466,
0.674,
0.188,
0.301,
0.745,
0.933,
0.635,
0.078,
0.184,
0.300,
0.300,
0.300,
0.600,
0.600,
0.600,
1.000,
0.000,
0.000,
1.000,
0.500,
0.000,
0.749,
0.749,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
1.000,
0.667,
0.000,
1.000,
0.333,
0.333,
0.000,
0.333,
0.667,
0.000,
0.333,
1.000,
0.000,
0.667,
0.333,
0.000,
0.667,
0.667,
0.000,
0.667,
1.000,
0.000,
1.000,
0.333,
0.000,
1.000,
0.667,
0.000,
1.000,
1.000,
0.000,
0.000,
0.333,
0.500,
0.000,
0.667,
0.500,
0.000,
1.000,
0.500,
0.333,
0.000,
0.500,
0.333,
0.333,
0.500,
0.333,
0.667,
0.500,
0.333,
1.000,
0.500,
0.667,
0.000,
0.500,
0.667,
0.333,
0.500,
0.667,
0.667,
0.500,
0.667,
1.000,
0.500,
1.000,
0.000,
0.500,
1.000,
0.333,
0.500,
1.000,
0.667,
0.500,
1.000,
1.000,
0.500,
0.000,
0.333,
1.000,
0.000,
0.667,
1.000,
0.000,
1.000,
1.000,
0.333,
0.000,
1.000,
0.333,
0.333,
1.000,
0.333,
0.667,
1.000,
0.333,
1.000,
1.000,
0.667,
0.000,
1.000,
0.667,
0.333,
1.000,
0.667,
0.667,
1.000,
0.667,
1.000,
1.000,
1.000,
0.000,
1.000,
1.000,
0.333,
1.000,
1.000,
0.667,
1.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.167,
0.000,
0.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.167,
0.000,
0.000,
0.333,
0.000,
0.000,
0.500,
0.000,
0.000,
0.667,
0.000,
0.000,
0.833,
0.000,
0.000,
1.000,
0.000,
0.000,
0.000,
0.143,
0.143,
0.143,
0.286,
0.286,
0.286,
0.429,
0.429,
0.429,
0.571,
0.571,
0.571,
0.714,
0.714,
0.714,
0.857,
0.857,
0.857,
0.000,
0.447,
0.741,
0.314,
0.717,
0.741,
0.50,
0.5,
0,
]).astype(np.float32).reshape(-1, 3))
def get_resize_matrix(raw_shape, dst_shape, keep_ratio):
"""
Get resize matrix for resizing raw img to input size
:param raw_shape: (width, height) of raw image
:param dst_shape: (width, height) of input image
:param keep_ratio: whether keep original ratio
:return: 3x3 Matrix
"""
r_w, r_h = raw_shape
d_w, d_h = dst_shape
Rs = np.eye(3)
if keep_ratio:
C = np.eye(3)
C[0, 2] = -r_w / 2
C[1, 2] = -r_h / 2
if r_w / r_h < d_w / d_h:
ratio = d_h / r_h
else:
ratio = d_w / r_w
Rs[0, 0] *= ratio
Rs[1, 1] *= ratio
T = np.eye(3)
T[0, 2] = 0.5 * d_w
T[1, 2] = 0.5 * d_h
return T @Rs @C
else:
Rs[0, 0] *= d_w / r_w
Rs[1, 1] *= d_h / r_h
return Rs
def warp_boxes(boxes, M, width, height):
"""Apply transform to boxes
Copy from picodet/data/transform/warp.py
"""
n = len(boxes)
if n:
# warp points
xy = np.ones((n * 4, 3))
xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @M.T # transform
xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
xy = np.concatenate(
(x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip boxes
xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
return xy.astype(np.float32)
else:
return boxes
def overlay_bbox_cv(img, all_box, class_names):
"""Draw result boxes
Copy from picodet/util/visualization.py
"""
all_box.sort(key=lambda v: v[5])
for box in all_box:
label, x0, y0, x1, y1, score = box
color = (_COLORS[label] * 255).astype(np.uint8).tolist()
text = "{}:{:.1f}%".format(class_names[label], score * 100)
txt_color = (0, 0, 0) if np.mean(_COLORS[label]) > 0.5 else (255, 255,
255)
font = cv2.FONT_HERSHEY_SIMPLEX
txt_size = cv2.getTextSize(text, font, 0.5, 2)[0]
cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
cv2.rectangle(
img,
(x0, y0 - txt_size[1] - 1),
(x0 + txt_size[0] + txt_size[1], y0 - 1),
color,
-1, )
cv2.putText(img, text, (x0, y0 - 1), font, 0.5, txt_color, thickness=1)
return img
def hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
"""
Args:
box_scores (N, 5): boxes in corner-form and probabilities.
iou_threshold: intersection over union threshold.
top_k: keep top_k results. If k <= 0, keep all the results.
candidate_size: only consider the candidates with the highest scores.
Returns:
picked: a list of indexes of the kept boxes
"""
scores = box_scores[:, -1]
boxes = box_scores[:, :-1]
picked = []
indexes = np.argsort(scores)
indexes = indexes[-candidate_size:]
while len(indexes) > 0:
current = indexes[-1]
picked.append(current)
if 0 < top_k == len(picked) or len(indexes) == 1:
break
current_box = boxes[current, :]
indexes = indexes[:-1]
rest_boxes = boxes[indexes, :]
iou = iou_of(
rest_boxes,
np.expand_dims(
current_box, axis=0), )
indexes = indexes[iou <= iou_threshold]
return box_scores[picked, :]
def iou_of(boxes0, boxes1, eps=1e-5):
"""Return intersection-over-union (Jaccard index) of boxes.
Args:
boxes0 (N, 4): ground truth boxes.
boxes1 (N or 1, 4): predicted boxes.
eps: a small number to avoid 0 as denominator.
Returns:
iou (N): IoU values.
"""
overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])
overlap_area = area_of(overlap_left_top, overlap_right_bottom)
area0 = area_of(boxes0[..., :2], boxes0[..., 2:])
area1 = area_of(boxes1[..., :2], boxes1[..., 2:])
return overlap_area / (area0 + area1 - overlap_area + eps)
def area_of(left_top, right_bottom):
"""Compute the areas of rectangles given two corners.
Args:
left_top (N, 2): left top corner.
right_bottom (N, 2): right bottom corner.
Returns:
area (N): return the area.
"""
hw = np.clip(right_bottom - left_top, 0.0, None)
return hw[..., 0] * hw[..., 1]
class picodetABC(metaclass=ABCMeta):
def __init__(
self,
input_shape=[320, 320],
reg_max=7,
strides=[8, 16, 32],
prob_threshold=0.4,
iou_threshold=0.3,
num_candidate=1000,
top_k=-1, ):
self.strides = strides
self.input_shape = input_shape
self.reg_max = reg_max
self.prob_threshold = prob_threshold
self.iou_threshold = iou_threshold
self.num_candidate = num_candidate
self.top_k = top_k
self.img_mean = [103.53, 116.28, 123.675]
self.img_std = [57.375, 57.12, 58.395]
self.input_size = (self.input_shape[1], self.input_shape[0])
self.class_names = [
"person",
"bicycle",
"car",
"motorcycle",
"airplane",
"bus",
"train",
"truck",
"boat",
"traffic_light",
"fire_hydrant",
"stop_sign",
"parking_meter",
"bench",
"bird",
"cat",
"dog",
"horse",
"sheep",
"cow",
"elephant",
"bear",
"zebra",
"giraffe",
"backpack",
"umbrella",
"handbag",
"tie",
"suitcase",
"frisbee",
"skis",
"snowboard",
"sports_ball",
"kite",
"baseball_bat",
"baseball_glove",
"skateboard",
"surfboard",
"tennis_racket",
"bottle",
"wine_glass",
"cup",
"fork",
"knife",
"spoon",
"bowl",
"banana",
"apple",
"sandwich",
"orange",
"broccoli",
"carrot",
"hot_dog",
"pizza",
"donut",
"cake",
"chair",
"couch",
"potted_plant",
"bed",
"dining_table",
"toilet",
"tv",
"laptop",
"mouse",
"remote",
"keyboard",
"cell_phone",
"microwave",
"oven",
"toaster",
"sink",
"refrigerator",
"book",
"clock",
"vase",
"scissors",
"teddy_bear",
"hair_drier",
"toothbrush",
]
def preprocess(self, img):
# resize image
ResizeM = get_resize_matrix((img.shape[1], img.shape[0]),
self.input_size, True)
img_resize = cv2.warpPerspective(img, ResizeM, dsize=self.input_size)
# normalize image
img_input = img_resize.astype(np.float32) / 255
img_mean = np.array(
self.img_mean, dtype=np.float32).reshape(1, 1, 3) / 255
img_std = np.array(
self.img_std, dtype=np.float32).reshape(1, 1, 3) / 255
img_input = (img_input - img_mean) / img_std
# expand dims
img_input = np.transpose(img_input, [2, 0, 1])
img_input = np.expand_dims(img_input, axis=0)
return img_input, ResizeM
def postprocess(self, scores, raw_boxes, ResizeM, raw_shape):
# generate centers
decode_boxes = []
select_scores = []
for stride, box_distribute, score in zip(self.strides, raw_boxes,
scores):
# centers
fm_h = self.input_shape[0] / stride
fm_w = self.input_shape[1] / stride
h_range = np.arange(fm_h)
w_range = np.arange(fm_w)
ww, hh = np.meshgrid(w_range, h_range)
ct_row = (hh.flatten() + 0.5) * stride
ct_col = (ww.flatten() + 0.5) * stride
center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1)
# box distribution to distance
reg_range = np.arange(self.reg_max + 1)
box_distance = box_distribute.reshape((-1, self.reg_max + 1))
box_distance = softmax(box_distance, axis=1)
box_distance = box_distance * np.expand_dims(reg_range, axis=0)
box_distance = np.sum(box_distance, axis=1).reshape((-1, 4))
box_distance = box_distance * stride
# top K candidate
topk_idx = np.argsort(score.max(axis=1))[::-1]
topk_idx = topk_idx[:self.num_candidate]
center = center[topk_idx]
score = score[topk_idx]
box_distance = box_distance[topk_idx]
# decode box
decode_box = center + [-1, -1, 1, 1] * box_distance
select_scores.append(score)
decode_boxes.append(decode_box)
# nms
bboxes = np.concatenate(decode_boxes, axis=0)
confidences = np.concatenate(select_scores, axis=0)
picked_box_probs = []
picked_labels = []
for class_index in range(0, confidences.shape[1]):
probs = confidences[:, class_index]
mask = probs > self.prob_threshold
probs = probs[mask]
if probs.shape[0] == 0:
continue
subset_boxes = bboxes[mask, :]
box_probs = np.concatenate(
[subset_boxes, probs.reshape(-1, 1)], axis=1)
box_probs = hard_nms(
box_probs,
iou_threshold=self.iou_threshold,
top_k=self.top_k, )
picked_box_probs.append(box_probs)
picked_labels.extend([class_index] * box_probs.shape[0])
if not picked_box_probs:
return np.array([]), np.array([]), np.array([])
picked_box_probs = np.concatenate(picked_box_probs)
# resize output boxes
picked_box_probs[:, :4] = warp_boxes(picked_box_probs[:, :4],
np.linalg.inv(ResizeM),
raw_shape[1], raw_shape[0])
return (
picked_box_probs[:, :4].astype(np.int32),
np.array(picked_labels),
picked_box_probs[:, 4], )
@abstractmethod
def infer_image(self, img_input):
pass
def detect(self, img):
raw_shape = img.shape
img_input, ResizeM = self.preprocess(img)
scores, raw_boxes = self.infer_image(img_input)
if scores[0].ndim == 1: # handling num_classes=1 case
scores = [x[:, None] for x in scores]
bbox, label, score = self.postprocess(scores, raw_boxes, ResizeM,
raw_shape)
return bbox, label, score
def draw_box(self, raw_img, bbox, label, score):
img = raw_img.copy()
all_box = [[x, ] + y + [z, ]
for x, y, z in zip(label, bbox.tolist(), score)]
img_draw = overlay_bbox_cv(img, all_box, self.class_names)
return img_draw
def detect_folder(self, img_fold, result_path):
img_fold = Path(img_fold)
result_path = Path(result_path)
result_path.mkdir(parents=True, exist_ok=True)
img_name_list = filter(
lambda x: str(x).endswith(".png") or str(x).endswith(".jpg"),
img_fold.iterdir(), )
img_name_list = list(img_name_list)
print(f"find {len(img_name_list)} images")
for img_path in tqdm(img_name_list):
img = cv2.imread(str(img_path))
bbox, label, score = self.detect(img)
img_draw = self.draw_box(img, bbox, label, score)
save_path = str(result_path / img_path.name.replace(".png", ".jpg"))
cv2.imwrite(save_path, img_draw)
class picodetONNX(picodetABC):
def __init__(self, model_path, *args, **kwargs):
import onnxruntime as ort
super(picodetONNX, self).__init__(*args, **kwargs)
print("Using ONNX as inference backend")
print(f"Using weight: {model_path}")
# load model
self.model_path = model_path
self.ort_session = ort.InferenceSession(self.model_path)
self.input_name = self.ort_session.get_inputs()[0].name
def infer_image(self, img_input):
inference_results = self.ort_session.run(None,
{self.input_name: img_input})
scores = [np.squeeze(x) for x in inference_results[:3]]
raw_boxes = [np.squeeze(x) for x in inference_results[3:]]
return scores, raw_boxes
class picodetTorch(picodetABC):
def __init__(self, model_path, cfg_path, *args, **kwargs):
import torch
from picodet.model.arch import build_model
from picodet.util import Logger, cfg, load_config, load_model_weight
super(picodetTorch, self).__init__(*args, **kwargs)
print("Using PyTorch as inference backend")
print(f"Using weight: {model_path}")
# load model
self.model_path = model_path
self.cfg_path = cfg_path
load_config(cfg, cfg_path)
self.logger = Logger(-1, cfg.save_dir, False)
self.model = build_model(cfg.model)
checkpoint = torch.load(
model_path, map_location=lambda storage, loc: storage)
load_model_weight(self.model, checkpoint, self.logger)
def infer_image(self, img_input):
import torch
self.model.train(False)
with torch.no_grad():
inference_results = self.model(torch.from_numpy(img_input))
scores = [
x.permute(0, 2, 3, 1).reshape((-1, 80)).sigmoid().detach().numpy()
for x in inference_results[0]
]
raw_boxes = [
x.permute(0, 2, 3, 1).reshape((-1, 32)).detach().numpy()
for x in inference_results[1]
]
return scores, raw_boxes
class picodetNCNN(picodetABC):
def __init__(self, model_param, model_bin, *args, **kwargs):
import ncnn
super(picodetNCNN, self).__init__(*args, **kwargs)
print("Using ncnn as inference backend")
print(f"Using param: {model_param}, bin: {model_bin}")
# load model
self.model_param = model_param
self.model_bin = model_bin
self.net = ncnn.Net()
self.net.load_param(model_param)
self.net.load_model(model_bin)
self.input_name = "input.1"
def infer_image(self, img_input):
import ncnn
mat_in = ncnn.Mat(img_input.squeeze())
ex = self.net.create_extractor()
ex.input(self.input_name, mat_in)
score_out_name = [
"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_1.tmp_1",
"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_3.tmp_1"
]
scores = [np.array(ex.extract(x)[1]) for x in score_out_name]
scores = [np.reshape(x, (-1, 80)) for x in scores]
boxes_out_name = [
"save_infer_model/scale_4.tmp_1", "save_infer_model/scale_5.tmp_1",
"save_infer_model/scale_6.tmp_1", "save_infer_model/scale_7.tmp_1"
]
raw_boxes = [np.array(ex.extract(x)[1]) for x in boxes_out_name]
raw_boxes = [np.reshape(x, (-1, 32)) for x in raw_boxes]
return scores, raw_boxes
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path",
dest="model_path",
type=str,
default="../model/picodet.param")
parser.add_argument(
"--model_bin",
dest="model_bin",
type=str,
default="../model/picodet.bin")
parser.add_argument(
"--cfg_path", dest="cfg_path", type=str, default="config/picodet.yml")
parser.add_argument(
"--img_fold", dest="img_fold", type=str, default="../imgs")
parser.add_argument(
"--result_fold", dest="result_fold", type=str, default="../results")
parser.add_argument(
"--input_shape",
dest="input_shape",
nargs=2,
type=int,
default=[320, 320])
parser.add_argument(
"--backend", choices=["ncnn", "ONNX", "torch"], default="ncnn")
args = parser.parse_args()
print(f"Detecting {args.img_fold}")
# load detector
if args.backend == "ncnn":
detector = picodetNCNN(
args.model_path, args.model_bin, input_shape=args.input_shape)
elif args.backend == "ONNX":
detector = picodetONNX(args.model_path, input_shape=args.input_shape)
elif args.backend == "torch":
detector = picodetTorch(
args.model_path, args.cfg_path, input_shape=args.input_shape)
else:
raise ValueError
# detect folder
detector.detect_folder(args.img_fold, args.result_fold)
def test_one():
detector = picodetNCNN("../weight/picodet_m_416.param",
"../weight/picodet_m_416.bin")
img = cv2.imread("../000000000102.jpg")
bbox, label, score = detector.detect(img)
img_draw = detector.draw_box(img, bbox, label, score)
img_out = img_draw[..., ::-1]
cv2.imwrite('python_version.jpg', img_out)
if __name__ == "__main__":
# main()
test_one()
cmake_minimum_required(VERSION 3.4.1)
set(CMAKE_CXX_STANDARD 14)
project(picodet_demo)
find_package(OpenCV REQUIRED)
find_package(InferenceEngine REQUIRED)
find_package(ngraph REQUIRED)
include_directories(
${OpenCV_INCLUDE_DIRS}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_BINARY_DIR}
)
add_executable(picodet_demo main.cpp picodet_openvino.cpp)
target_link_libraries(
picodet_demo
${InferenceEngine_LIBRARIES}
${NGRAPH_LIBRARIES}
${OpenCV_LIBS}
)
# PicoDet OpenVINO Demo
This fold provides PicoDet inference code using
[Intel's OpenVINO Toolkit](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html). Most of the implements in this fold are same as *demo_ncnn*.
**Recommand** to use the xxx.tar.gz file to install instead of github method.
## Install OpenVINO Toolkit
Go to [OpenVINO HomePage](https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit.html)
Download a suitable version and install.
Follow the official Get Started Guides: https://docs.openvinotoolkit.org/latest/get_started_guides.html
## Set the Environment Variables
### Windows:
Run this command in cmd. (Every time before using OpenVINO)
```cmd
<INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
```
Or set the system environment variables once for all:
Name |Value
:--------------------:|:--------:
INTEL_OPENVINO_DIR | <INSTSLL_DIR>\openvino_2021
INTEL_CVSDK_DIR | %INTEL_OPENVINO_DIR%
InferenceEngine_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\share
HDDL_INSTALL_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\hddl
ngraph_DIR | %INTEL_OPENVINO_DIR%\deployment_tools\ngraph\cmake
And add this to ```Path```
```
%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Debug;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\bin\intel64\Release;%HDDL_INSTALL_DIR%\bin;%INTEL_OPENVINO_DIR%\deployment_tools\inference_engine\external\tbb\bin;%INTEL_OPENVINO_DIR%\deployment_tools\ngraph\lib
```
### Linux
Run this command in shell. (Every time before using OpenVINO)
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
```
Or edit .bashrc
```shell
vi ~/.bashrc
```
Add this line to the end of the file
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
```
## Convert model
Convert to OpenVINO
``` shell
cd <INSTSLL_DIR>/openvino_2021/deployment_tools/model_optimizer
```
Install requirements for convert tool
```shell
cd ./install_prerequisites
sudo install_prerequisites_onnx.sh
```
Then convert model. Notice: mean_values and scale_values should be the same with your training settings in YAML config file.
```shell
python3 mo_onnx.py --input_model <ONNX_MODEL> --mean_values [103.53,116.28,123.675] --scale_values [57.375,57.12,58.395]
```
## Build
### Windows
```cmd
<OPENVINO_INSTSLL_DIR>\openvino_2021\bin\setupvars.bat
mkdir -p build
cd build
cmake ..
msbuild picodet_demo.vcxproj /p:configuration=release /p:platform=x64
```
### Linux
```shell
source /opt/intel/openvino_2021/bin/setupvars.sh
mkdir build
cd build
cmake ..
make
```
## Run demo
Download PicoDet openvino model [PicoDet openvino model download link](https://paddledet.bj.bcebos.com/deploy/third_engine/picodet_m_416_openvino.zip).
move picodet openvino model files to the demo's weight folder. Then run these commands:
### Webcam
```shell
picodet_demo 0 0
```
### Inference images
```shell
picodet_demo 1 IMAGE_FOLDER/*.jpg
```
### Inference video
```shell
picodet_demo 2 VIDEO_PATH
```
### Benchmark
```shell
picodet_demo 3 0
```
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet
#include "picodet_openvino.h"
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
struct object_rect {
int x;
int y;
int width;
int height;
};
int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area)
{
int w = src.cols;
int h = src.rows;
int dst_w = dst_size.width;
int dst_h = dst_size.height;
dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
float ratio_src = w * 1.0 / h;
float ratio_dst = dst_w * 1.0 / dst_h;
int tmp_w = 0;
int tmp_h = 0;
if (ratio_src > ratio_dst) {
tmp_w = dst_w;
tmp_h = floor((dst_w * 1.0 / w) * h);
}
else if (ratio_src < ratio_dst) {
tmp_h = dst_h;
tmp_w = floor((dst_h * 1.0 / h) * w);
}
else {
cv::resize(src, dst, dst_size);
effect_area.x = 0;
effect_area.y = 0;
effect_area.width = dst_w;
effect_area.height = dst_h;
return 0;
}
cv::Mat tmp;
cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
if (tmp_w != dst_w) {
int index_w = floor((dst_w - tmp_w) / 2.0);
for (int i = 0; i < dst_h; i++) {
memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3);
}
effect_area.x = index_w;
effect_area.y = 0;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
}
else if (tmp_h != dst_h) {
int index_h = floor((dst_h - tmp_h) / 2.0);
memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
effect_area.x = 0;
effect_area.y = index_h;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
}
else {
printf("error\n");
}
return 0;
}
const int color_list[80][3] =
{
{216 , 82 , 24},
{236 ,176 , 31},
{125 , 46 ,141},
{118 ,171 , 47},
{ 76 ,189 ,237},
{238 , 19 , 46},
{ 76 , 76 , 76},
{153 ,153 ,153},
{255 , 0 , 0},
{255 ,127 , 0},
{190 ,190 , 0},
{ 0 ,255 , 0},
{ 0 , 0 ,255},
{170 , 0 ,255},
{ 84 , 84 , 0},
{ 84 ,170 , 0},
{ 84 ,255 , 0},
{170 , 84 , 0},
{170 ,170 , 0},
{170 ,255 , 0},
{255 , 84 , 0},
{255 ,170 , 0},
{255 ,255 , 0},
{ 0 , 84 ,127},
{ 0 ,170 ,127},
{ 0 ,255 ,127},
{ 84 , 0 ,127},
{ 84 , 84 ,127},
{ 84 ,170 ,127},
{ 84 ,255 ,127},
{170 , 0 ,127},
{170 , 84 ,127},
{170 ,170 ,127},
{170 ,255 ,127},
{255 , 0 ,127},
{255 , 84 ,127},
{255 ,170 ,127},
{255 ,255 ,127},
{ 0 , 84 ,255},
{ 0 ,170 ,255},
{ 0 ,255 ,255},
{ 84 , 0 ,255},
{ 84 , 84 ,255},
{ 84 ,170 ,255},
{ 84 ,255 ,255},
{170 , 0 ,255},
{170 , 84 ,255},
{170 ,170 ,255},
{170 ,255 ,255},
{255 , 0 ,255},
{255 , 84 ,255},
{255 ,170 ,255},
{ 42 , 0 , 0},
{ 84 , 0 , 0},
{127 , 0 , 0},
{170 , 0 , 0},
{212 , 0 , 0},
{255 , 0 , 0},
{ 0 , 42 , 0},
{ 0 , 84 , 0},
{ 0 ,127 , 0},
{ 0 ,170 , 0},
{ 0 ,212 , 0},
{ 0 ,255 , 0},
{ 0 , 0 , 42},
{ 0 , 0 , 84},
{ 0 , 0 ,127},
{ 0 , 0 ,170},
{ 0 , 0 ,212},
{ 0 , 0 ,255},
{ 0 , 0 , 0},
{ 36 , 36 , 36},
{ 72 , 72 , 72},
{109 ,109 ,109},
{145 ,145 ,145},
{182 ,182 ,182},
{218 ,218 ,218},
{ 0 ,113 ,188},
{ 80 ,182 ,188},
{127 ,127 , 0},
};
void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi)
{
static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus",
"train", "truck", "boat", "traffic light", "fire hydrant",
"stop sign", "parking meter", "bench", "bird", "cat", "dog",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat",
"baseball glove", "skateboard", "surfboard", "tennis racket",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot",
"hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
"toaster", "sink", "refrigerator", "book", "clock", "vase",
"scissors", "teddy bear", "hair drier", "toothbrush"
};
cv::Mat image = bgr.clone();
int src_w = image.cols;
int src_h = image.rows;
int dst_w = effect_roi.width;
int dst_h = effect_roi.height;
float width_ratio = (float)src_w / (float)dst_w;
float height_ratio = (float)src_h / (float)dst_h;
for (size_t i = 0; i < bboxes.size(); i++)
{
const BoxInfo& bbox = bboxes[i];
cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]);
cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio),
cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color);
char text[256];
sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
int x = (bbox.x1 - effect_roi.x) * width_ratio;
int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
color, -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
}
cv::imshow("image", image);
}
int image_demo(PicoDet& detector, const char* imagepath)
{
std::vector<std::string> filenames;
cv::glob(imagepath, filenames, false);
for (auto img_name : filenames)
{
cv::Mat image = cv::imread(img_name);
if (image.empty())
{
return -1;
}
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
}
return 0;
}
int webcam_demo(PicoDet& detector, int cam_id)
{
cv::Mat image;
cv::VideoCapture cap(cam_id);
while (true)
{
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int video_demo(PicoDet& detector, const char* path)
{
cv::Mat image;
cv::VideoCapture cap(path);
while (true)
{
cap >> image;
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(320, 320), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
cv::waitKey(1);
}
return 0;
}
int benchmark(PicoDet& detector)
{
int loop_num = 100;
int warm_up = 8;
double time_min = DBL_MAX;
double time_max = -DBL_MAX;
double time_avg = 0;
cv::Mat image(320, 320, CV_8UC3, cv::Scalar(1, 1, 1));
for (int i = 0; i < warm_up + loop_num; i++)
{
auto start = std::chrono::steady_clock::now();
std::vector<BoxInfo> results;
results = detector.detect(image, 0.4, 0.5);
auto end = std::chrono::steady_clock::now();
double time = std::chrono::duration<double, std::milli>(end - start).count();
if (i >= warm_up)
{
time_min = (std::min)(time_min, time);
time_max = (std::max)(time_max, time);
time_avg += time;
}
}
time_avg /= loop_num;
fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", time_min, time_max, time_avg);
return 0;
}
int main(int argc, char** argv)
{
if (argc != 3)
{
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
return -1;
}
std::cout<<"start init model"<<std::endl;
auto detector = PicoDet("../weight/picodet_m_416.xml");
std::cout<<"success"<<std::endl;
int mode = atoi(argv[1]);
switch (mode)
{
case 0:{
int cam_id = atoi(argv[2]);
webcam_demo(detector, cam_id);
break;
}
case 1:{
const char* images = argv[2];
image_demo(detector, images);
break;
}
case 2:{
const char* path = argv[2];
video_demo(detector, path);
break;
}
case 3:{
benchmark(detector);
break;
}
default:{
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
break;
}
}
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
#include "picodet_openvino.h"
inline float fast_exp(float x)
{
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x)
{
return 1.0f / (1.0f + fast_exp(-x));
}
template<typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
{
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{ 0 };
for (int i = 0; i < length; ++i)
{
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i)
{
dst[i] /= denominator;
}
return 0;
}
PicoDet::PicoDet(const char* model_path)
{
InferenceEngine::Core ie;
InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path);
// prepare input settings
InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
input_name_ = inputs_map.begin()->first;
InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
//prepare output settings
InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
for (auto &output_info : outputs_map)
{
output_info.second->setPrecision(InferenceEngine::Precision::FP32);
}
//get network
network_ = ie.LoadNetwork(model, "CPU");
infer_request_ = network_.CreateInferRequest();
}
PicoDet::~PicoDet()
{
}
void PicoDet::preprocess(cv::Mat& image, InferenceEngine::Blob::Ptr& blob)
{
int img_w = image.cols;
int img_h = image.rows;
int channels = 3;
InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
if (!mblob)
{
THROW_IE_EXCEPTION << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
<< "but by fact we were not able to cast inputBlob to MemoryBlob";
}
auto mblobHolder = mblob->wmap();
float *blob_data = mblobHolder.as<float *>();
for (size_t c = 0; c < channels; c++)
{
for (size_t h = 0; h < img_h; h++)
{
for (size_t w = 0; w < img_w; w++)
{
blob_data[c * img_w * img_h + h * img_w + w] =
(float)image.at<cv::Vec3b>(h, w)[c];
}
}
}
}
std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold)
{
InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_);
preprocess(image, input_blob);
// do inference
infer_request_.Infer();
// get output
std::vector<std::vector<BoxInfo>> results;
results.resize(this->num_class_);
for (const auto& head_info : this->heads_info_)
{
const InferenceEngine::Blob::Ptr dis_pred_blob = infer_request_.GetBlob(head_info.dis_layer);
const InferenceEngine::Blob::Ptr cls_pred_blob = infer_request_.GetBlob(head_info.cls_layer);
auto mdis_pred = InferenceEngine::as<InferenceEngine::MemoryBlob>(dis_pred_blob);
auto mdis_pred_holder = mdis_pred->rmap();
const float *dis_pred = mdis_pred_holder.as<const float *>();
auto mcls_pred = InferenceEngine::as<InferenceEngine::MemoryBlob>(cls_pred_blob);
auto mcls_pred_holder = mcls_pred->rmap();
const float *cls_pred = mcls_pred_holder.as<const float *>();
this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results);
}
std::vector<BoxInfo> dets;
for (int i = 0; i < (int)results.size(); i++)
{
this->nms(results[i], nms_threshold);
for (auto& box : results[i])
{
dets.push_back(box);
}
}
return dets;
}
void PicoDet::decode_infer(const float*& cls_pred, const float*& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results)
{
int feature_h = input_size_ / stride;
int feature_w = input_size_ / stride;
for (int idx = 0; idx < feature_h * feature_w; idx++)
{
int row = idx / feature_w;
int col = idx % feature_w;
float score = 0;
int cur_label = 0;
for (int label = 0; label < num_class_; label++)
{
if (cls_pred[idx * num_class_ +label] > score)
{
score = cls_pred[idx * num_class_ + label];
cur_label = label;
}
}
if (score > threshold)
{
const float* bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4;
results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
}
BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride)
{
float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred;
dis_pred.resize(4);
for (int i = 0; i < 4; i++)
{
float dis = 0;
float* dis_after_sm = new float[reg_max_ + 1];
activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm, reg_max_ + 1);
for (int j = 0; j < reg_max_ + 1; j++)
{
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
}
float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_);
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_);
return BoxInfo { xmin, ymin, xmax, ymax, score, label };
}
void PicoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH)
{
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i)
{
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1)
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
}
for (int i = 0; i < int(input_boxes.size()); ++i)
{
for (int j = i + 1; j < int(input_boxes.size());)
{
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH)
{
input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
}
}
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// reference from https://github.com/RangiLyu/nanodet/tree/main/demo_openvino
#ifndef _PICODET_OPENVINO_H_
#define _PICODET_OPENVINO_H_
#include <string>
#include <opencv2/core.hpp>
#include <inference_engine.hpp>
typedef struct HeadInfo
{
std::string cls_layer;
std::string dis_layer;
int stride;
} HeadInfo;
typedef struct BoxInfo
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class PicoDet
{
public:
PicoDet(const char* param);
~PicoDet();
InferenceEngine::ExecutableNetwork network_;
InferenceEngine::InferRequest infer_request_;
// static bool hasGPU;
std::vector<HeadInfo> heads_info_{
// cls_pred|dis_pred|stride
{"save_infer_model/scale_0.tmp_1", "save_infer_model/scale_4.tmp_1", 8},
{"save_infer_model/scale_1.tmp_1", "save_infer_model/scale_5.tmp_1", 16},
{"save_infer_model/scale_2.tmp_1", "save_infer_model/scale_6.tmp_1", 32},
{"save_infer_model/scale_3.tmp_1", "save_infer_model/scale_7.tmp_1", 64},
};
std::vector<BoxInfo> detect(cv::Mat image, float score_threshold, float nms_threshold);
private:
void preprocess(cv::Mat& image, InferenceEngine::Blob::Ptr& blob);
void decode_infer(const float*& cls_pred, const float*& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results);
BoxInfo disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride);
static void nms(std::vector<BoxInfo>& result, float nms_threshold);
std::string input_name_;
int input_size_ = 320;
int num_class_ = 80;
int reg_max_ = 7;
};
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册