未验证 提交 2580dc01 编写于 作者: G Guanghua Yu 提交者: GitHub

fix picodet cpp infer (#5065)

上级 b367361e
...@@ -14,25 +14,24 @@ ...@@ -14,25 +14,24 @@
#pragma once #pragma once
#include <string> #include <cmath>
#include <vector>
#include <memory>
#include <utility>
#include <ctime> #include <ctime>
#include <memory>
#include <numeric> #include <numeric>
#include <string>
#include <utility>
#include <vector>
#include "include/utils.h" #include "include/utils.h"
namespace PaddleDetection { namespace PaddleDetection {
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results, void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult> *results,
std::vector<const float *> outs, std::vector<const float *> outs,
std::vector<int> fpn_stride, std::vector<int> fpn_stride,
std::vector<float> im_shape, std::vector<float> im_shape,
std::vector<float> scale_factor, std::vector<float> scale_factor,
float score_threshold = 0.3, float score_threshold = 0.3, float nms_threshold = 0.5,
float nms_threshold = 0.5, int num_class = 80, int reg_max = 7);
int num_class = 80,
int reg_max = 7);
} // namespace PaddleDetection } // namespace PaddleDetection
\ No newline at end of file
...@@ -20,79 +20,76 @@ ...@@ -20,79 +20,76 @@
namespace PaddleDetection { namespace PaddleDetection {
float fast_exp(float x) { float fast_exp(float x) {
union { union {
uint32_t i; uint32_t i;
float f; float f;
} v{}; } v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f; return v.f;
} }
template <typename _Tp> template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
const _Tp alpha = *std::max_element(src, src + length); const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0}; _Tp denominator{0};
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha); dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i]; denominator += dst[i];
} }
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
dst[i] /= denominator; dst[i] /= denominator;
} }
return 0; return 0;
} }
// PicoDet decode // PicoDet decode
PaddleDetection::ObjectResult disPred2Bbox(const float *&dfl_det, int label, float score, PaddleDetection::ObjectResult
int x, int y, int stride, std::vector<float> im_shape, disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y,
int reg_max) { int stride, std::vector<float> im_shape, int reg_max) {
float ct_x = (x + 0.5) * stride; float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride; float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred; std::vector<float> dis_pred;
dis_pred.resize(4); dis_pred.resize(4);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
float dis = 0; float dis = 0;
float* dis_after_sm = new float[reg_max + 1]; float *dis_after_sm = new float[reg_max + 1];
activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm,
for (int j = 0; j < reg_max + 1; j++) { reg_max + 1);
dis += j * dis_after_sm[j]; for (int j = 0; j < reg_max + 1; j++) {
} dis += j * dis_after_sm[j];
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
} }
int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f); dis *= stride;
int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f); dis_pred[i] = dis;
int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]); delete[] dis_after_sm;
int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]); }
int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f);
int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f);
int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]);
int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]);
PaddleDetection::ObjectResult result_item; PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax}; result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = label; result_item.class_id = label;
result_item.confidence = score; result_item.confidence = score;
return result_item; return result_item;
} }
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult> *results,
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results, std::vector<const float *> outs,
std::vector<const float *> outs, std::vector<int> fpn_stride,
std::vector<int> fpn_stride, std::vector<float> im_shape,
std::vector<float> im_shape, std::vector<float> scale_factor, float score_threshold,
std::vector<float> scale_factor, float nms_threshold, int num_class, int reg_max) {
float score_threshold,
float nms_threshold,
int num_class,
int reg_max) {
std::vector<std::vector<PaddleDetection::ObjectResult>> bbox_results; std::vector<std::vector<PaddleDetection::ObjectResult>> bbox_results;
bbox_results.resize(num_class); bbox_results.resize(num_class);
int in_h = im_shape[0], in_w = im_shape[1]; int in_h = im_shape[0], in_w = im_shape[1];
for (int i = 0; i < fpn_stride.size(); ++i) { for (int i = 0; i < fpn_stride.size(); ++i) {
int feature_h = in_h / fpn_stride[i]; int feature_h = std::ceil((float)in_h / fpn_stride[i]);
int feature_w = in_w / fpn_stride[i]; int feature_w = std::ceil((float)in_w / fpn_stride[i]);
for (int idx = 0; idx < feature_h * feature_w; idx++) { for (int idx = 0; idx < feature_h * feature_w; idx++) {
const float *scores = outs[i] + (idx * num_class); const float *scores = outs[i] + (idx * num_class);
...@@ -107,10 +104,11 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results, ...@@ -107,10 +104,11 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results,
} }
} }
if (score > score_threshold) { if (score > score_threshold) {
const float *bbox_pred = outs[i + fpn_stride.size()] const float *bbox_pred =
+ (idx * 4 * (reg_max + 1)); outs[i + fpn_stride.size()] + (idx * 4 * (reg_max + 1));
bbox_results[cur_label].push_back(disPred2Bbox(bbox_pred, bbox_results[cur_label].push_back(
cur_label, score, col, row, fpn_stride[i], im_shape, reg_max)); disPred2Bbox(bbox_pred, cur_label, score, col, row, fpn_stride[i],
im_shape, reg_max));
} }
} }
} }
...@@ -118,13 +116,13 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results, ...@@ -118,13 +116,13 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results,
PaddleDetection::nms(bbox_results[i], nms_threshold); PaddleDetection::nms(bbox_results[i], nms_threshold);
for (auto box : bbox_results[i]) { for (auto box : bbox_results[i]) {
box.rect[0] = box.rect[0] / scale_factor[1]; box.rect[0] = box.rect[0] / scale_factor[1];
box.rect[2] = box.rect[2] / scale_factor[1]; box.rect[2] = box.rect[2] / scale_factor[1];
box.rect[1] = box.rect[1] / scale_factor[0]; box.rect[1] = box.rect[1] / scale_factor[0];
box.rect[3] = box.rect[3] / scale_factor[0]; box.rect[3] = box.rect[3] / scale_factor[0];
results->push_back(box); results->push_back(box);
} }
} }
} }
} // namespace PaddleDetection } // namespace PaddleDetection
...@@ -20,79 +20,76 @@ ...@@ -20,79 +20,76 @@
namespace PaddleDetection { namespace PaddleDetection {
float fast_exp(float x) { float fast_exp(float x) {
union { union {
uint32_t i; uint32_t i;
float f; float f;
} v{}; } v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f; return v.f;
} }
template <typename _Tp> template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
const _Tp alpha = *std::max_element(src, src + length); const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0}; _Tp denominator{0};
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha); dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i]; denominator += dst[i];
} }
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
dst[i] /= denominator; dst[i] /= denominator;
} }
return 0; return 0;
} }
// PicoDet decode // PicoDet decode
PaddleDetection::ObjectResult disPred2Bbox(const float *&dfl_det, int label, float score, PaddleDetection::ObjectResult
int x, int y, int stride, std::vector<float> im_shape, disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y,
int reg_max) { int stride, std::vector<float> im_shape, int reg_max) {
float ct_x = (x + 0.5) * stride; float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride; float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred; std::vector<float> dis_pred;
dis_pred.resize(4); dis_pred.resize(4);
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
float dis = 0; float dis = 0;
float* dis_after_sm = new float[reg_max + 1]; float *dis_after_sm = new float[reg_max + 1];
activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm,
for (int j = 0; j < reg_max + 1; j++) { reg_max + 1);
dis += j * dis_after_sm[j]; for (int j = 0; j < reg_max + 1; j++) {
} dis += j * dis_after_sm[j];
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
} }
int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f); dis *= stride;
int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f); dis_pred[i] = dis;
int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]); delete[] dis_after_sm;
int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]); }
int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f);
int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f);
int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]);
int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]);
PaddleDetection::ObjectResult result_item; PaddleDetection::ObjectResult result_item;
result_item.rect = {xmin, ymin, xmax, ymax}; result_item.rect = {xmin, ymin, xmax, ymax};
result_item.class_id = label; result_item.class_id = label;
result_item.confidence = score; result_item.confidence = score;
return result_item; return result_item;
} }
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult> *results,
void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results, std::vector<const float *> outs,
std::vector<const float *> outs, std::vector<int> fpn_stride,
std::vector<int> fpn_stride, std::vector<float> im_shape,
std::vector<float> im_shape, std::vector<float> scale_factor, float score_threshold,
std::vector<float> scale_factor, float nms_threshold, int num_class, int reg_max) {
float score_threshold,
float nms_threshold,
int num_class,
int reg_max) {
std::vector<std::vector<PaddleDetection::ObjectResult>> bbox_results; std::vector<std::vector<PaddleDetection::ObjectResult>> bbox_results;
bbox_results.resize(num_class); bbox_results.resize(num_class);
int in_h = im_shape[0], in_w = im_shape[1]; int in_h = im_shape[0], in_w = im_shape[1];
for (int i = 0; i < fpn_stride.size(); ++i) { for (int i = 0; i < fpn_stride.size(); ++i) {
int feature_h = in_h / fpn_stride[i]; int feature_h = ceil((float)in_h / fpn_stride[i]);
int feature_w = in_w / fpn_stride[i]; int feature_w = ceil((float)in_w / fpn_stride[i]);
for (int idx = 0; idx < feature_h * feature_w; idx++) { for (int idx = 0; idx < feature_h * feature_w; idx++) {
const float *scores = outs[i] + (idx * num_class); const float *scores = outs[i] + (idx * num_class);
...@@ -107,10 +104,11 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results, ...@@ -107,10 +104,11 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results,
} }
} }
if (score > score_threshold) { if (score > score_threshold) {
const float *bbox_pred = outs[i + fpn_stride.size()] const float *bbox_pred =
+ (idx * 4 * (reg_max + 1)); outs[i + fpn_stride.size()] + (idx * 4 * (reg_max + 1));
bbox_results[cur_label].push_back(disPred2Bbox(bbox_pred, bbox_results[cur_label].push_back(
cur_label, score, col, row, fpn_stride[i], im_shape, reg_max)); disPred2Bbox(bbox_pred, cur_label, score, col, row, fpn_stride[i],
im_shape, reg_max));
} }
} }
} }
...@@ -118,13 +116,13 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results, ...@@ -118,13 +116,13 @@ void PicoDetPostProcess(std::vector<PaddleDetection::ObjectResult>* results,
PaddleDetection::nms(bbox_results[i], nms_threshold); PaddleDetection::nms(bbox_results[i], nms_threshold);
for (auto box : bbox_results[i]) { for (auto box : bbox_results[i]) {
box.rect[0] = box.rect[0] / scale_factor[1]; box.rect[0] = box.rect[0] / scale_factor[1];
box.rect[2] = box.rect[2] / scale_factor[1]; box.rect[2] = box.rect[2] / scale_factor[1];
box.rect[1] = box.rect[1] / scale_factor[0]; box.rect[1] = box.rect[1] / scale_factor[0];
box.rect[3] = box.rect[3] / scale_factor[0]; box.rect[3] = box.rect[3] / scale_factor[0];
results->push_back(box); results->push_back(box);
} }
} }
} }
} // namespace PaddleDetection } // namespace PaddleDetection
...@@ -17,223 +17,203 @@ ...@@ -17,223 +17,203 @@
using namespace std; using namespace std;
PicoDet::PicoDet(const std::string &mnn_path, PicoDet::PicoDet(const std::string &mnn_path, int input_width, int input_length,
int input_width, int input_length, int num_thread_, int num_thread_, float score_threshold_,
float score_threshold_, float nms_threshold_) float nms_threshold_) {
{ num_thread = num_thread_;
num_thread = num_thread_; in_w = input_width;
in_w = input_width; in_h = input_length;
in_h = input_length; score_threshold = score_threshold_;
score_threshold = score_threshold_; nms_threshold = nms_threshold_;
nms_threshold = nms_threshold_;
PicoDet_interpreter = std::shared_ptr<MNN::Interpreter>(
PicoDet_interpreter = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(mnn_path.c_str())); MNN::Interpreter::createFromFile(mnn_path.c_str()));
MNN::ScheduleConfig config; MNN::ScheduleConfig config;
config.numThread = num_thread; config.numThread = num_thread;
MNN::BackendConfig backendConfig; MNN::BackendConfig backendConfig;
backendConfig.precision = (MNN::BackendConfig::PrecisionMode) 2; backendConfig.precision = (MNN::BackendConfig::PrecisionMode)2;
config.backendConfig = &backendConfig; config.backendConfig = &backendConfig;
PicoDet_session = PicoDet_interpreter->createSession(config); PicoDet_session = PicoDet_interpreter->createSession(config);
input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr); input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr);
} }
PicoDet::~PicoDet() PicoDet::~PicoDet() {
{ PicoDet_interpreter->releaseModel();
PicoDet_interpreter->releaseModel(); PicoDet_interpreter->releaseSession(PicoDet_session);
PicoDet_interpreter->releaseSession(PicoDet_session);
} }
int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list) int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list) {
{ if (raw_image.empty()) {
if (raw_image.empty()) { std::cout << "image is empty ,please check!" << std::endl;
std::cout << "image is empty ,please check!" << std::endl; return -1;
return -1; }
}
image_h = raw_image.rows;
image_h = raw_image.rows; image_w = raw_image.cols;
image_w = raw_image.cols; cv::Mat image;
cv::Mat image; cv::resize(raw_image, image, cv::Size(in_w, in_h));
cv::resize(raw_image, image, cv::Size(in_w, in_h));
PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w});
PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w}); PicoDet_interpreter->resizeSession(PicoDet_session);
PicoDet_interpreter->resizeSession(PicoDet_session); std::shared_ptr<MNN::CV::ImageProcess> pretreat(MNN::CV::ImageProcess::create(
std::shared_ptr<MNN::CV::ImageProcess> pretreat( MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3, norm_vals, 3));
MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3, pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor);
norm_vals, 3));
pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor); auto start = chrono::steady_clock::now();
auto start = chrono::steady_clock::now(); // run network
PicoDet_interpreter->runSession(PicoDet_session);
// run network
PicoDet_interpreter->runSession(PicoDet_session); // get output data
std::vector<std::vector<BoxInfo>> results;
// get output data results.resize(num_class);
std::vector<std::vector<BoxInfo>> results;
results.resize(num_class); for (const auto &head_info : heads_info) {
MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(
for (const auto &head_info : heads_info) PicoDet_session, head_info.cls_layer.c_str());
{ MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(
MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.cls_layer.c_str()); PicoDet_session, head_info.dis_layer.c_str());
MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.dis_layer.c_str());
MNN::Tensor tensor_scores_host(tensor_scores,
MNN::Tensor tensor_scores_host(tensor_scores, tensor_scores->getDimensionType()); tensor_scores->getDimensionType());
tensor_scores->copyToHostTensor(&tensor_scores_host); tensor_scores->copyToHostTensor(&tensor_scores_host);
MNN::Tensor tensor_boxes_host(tensor_boxes, tensor_boxes->getDimensionType()); MNN::Tensor tensor_boxes_host(tensor_boxes,
tensor_boxes->copyToHostTensor(&tensor_boxes_host); tensor_boxes->getDimensionType());
tensor_boxes->copyToHostTensor(&tensor_boxes_host);
decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride, score_threshold, results);
} decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride,
score_threshold, results);
auto end = chrono::steady_clock::now(); }
chrono::duration<double> elapsed = end - start;
cout << "inference time:" << elapsed.count() << " s, "; auto end = chrono::steady_clock::now();
chrono::duration<double> elapsed = end - start;
for (int i = 0; i < (int)results.size(); i++) cout << "inference time:" << elapsed.count() << " s, ";
{
nms(results[i], nms_threshold); for (int i = 0; i < (int)results.size(); i++) {
nms(results[i], nms_threshold);
for (auto box : results[i])
{ for (auto box : results[i]) {
box.x1 = box.x1 / in_w * image_w; box.x1 = box.x1 / in_w * image_w;
box.x2 = box.x2 / in_w * image_w; box.x2 = box.x2 / in_w * image_w;
box.y1 = box.y1 / in_h * image_h; box.y1 = box.y1 / in_h * image_h;
box.y2 = box.y2 / in_h * image_h; box.y2 = box.y2 / in_h * image_h;
result_list.push_back(box); result_list.push_back(box);
}
} }
cout << "detect " << result_list.size() << " objects" << endl; }
cout << "detect " << result_list.size() << " objects" << endl;
return 0; return 0;
} }
void PicoDet::decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>> &results) void PicoDet::decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred,
{ int stride, float threshold,
int feature_h = in_h / stride; std::vector<std::vector<BoxInfo>> &results) {
int feature_w = in_w / stride; int feature_h = ceil((float)in_h / stride);
int feature_w = ceil((float)in_w / stride);
for (int idx = 0; idx < feature_h * feature_w; idx++)
{ for (int idx = 0; idx < feature_h * feature_w; idx++) {
const float *scores = cls_pred->host<float>() + (idx * num_class); const float *scores = cls_pred->host<float>() + (idx * num_class);
int row = idx / feature_w; int row = idx / feature_w;
int col = idx % feature_w; int col = idx % feature_w;
float score = 0; float score = 0;
int cur_label = 0; int cur_label = 0;
for (int label = 0; label < num_class; label++) for (int label = 0; label < num_class; label++) {
{ if (scores[label] > score) {
if (scores[label] > score) score = scores[label];
{ cur_label = label;
score = scores[label]; }
cur_label = label;
}
}
if (score > threshold)
{
const float *bbox_pred = dis_pred->host<float>() + (idx * 4 * (reg_max + 1));
results[cur_label].push_back(disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
} }
if (score > threshold) {
const float *bbox_pred =
dis_pred->host<float>() + (idx * 4 * (reg_max + 1));
results[cur_label].push_back(
disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
} }
BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride) BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score,
{ int x, int y, int stride) {
float ct_x = (x + 0.5) * stride; float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride; float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred; std::vector<float> dis_pred;
dis_pred.resize(4); dis_pred.resize(4);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++) {
{ float dis = 0;
float dis = 0; float *dis_after_sm = new float[reg_max + 1];
float *dis_after_sm = new float[reg_max + 1]; activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm,
activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); reg_max + 1);
for (int j = 0; j < reg_max + 1; j++) for (int j = 0; j < reg_max + 1; j++) {
{ dis += j * dis_after_sm[j];
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
} }
float xmin = (std::max)(ct_x - dis_pred[0], .0f); dis *= stride;
float ymin = (std::max)(ct_y - dis_pred[1], .0f); dis_pred[i] = dis;
float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w); delete[] dis_after_sm;
float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h); }
return BoxInfo{xmin, ymin, xmax, ymax, score, label}; float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w);
float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h);
return BoxInfo{xmin, ymin, xmax, ymax, score, label};
} }
void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
{ std::sort(input_boxes.begin(), input_boxes.end(),
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size()); std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) for (int i = 0; i < int(input_boxes.size()); ++i) {
{ vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
} }
for (int i = 0; i < int(input_boxes.size()); ++i) for (int i = 0; i < int(input_boxes.size()); ++i) {
{ for (int j = i + 1; j < int(input_boxes.size());) {
for (int j = i + 1; j < int(input_boxes.size());) float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
{ float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); float w = (std::max)(float(0), xx2 - xx1 + 1);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); float h = (std::max)(float(0), yy2 - yy1 + 1);
float w = (std::max)(float(0), xx2 - xx1 + 1); float inter = w * h;
float h = (std::max)(float(0), yy2 - yy1 + 1); float ovr = inter / (vArea[i] + vArea[j] - inter);
float inter = w * h; if (ovr >= NMS_THRESH) {
float ovr = inter / (vArea[i] + vArea[j] - inter); input_boxes.erase(input_boxes.begin() + j);
if (ovr >= NMS_THRESH) vArea.erase(vArea.begin() + j);
{ } else {
input_boxes.erase(input_boxes.begin() + j); j++;
vArea.erase(vArea.begin() + j); }
}
else
{
j++;
}
}
} }
}
} }
string PicoDet::get_label_str(int label) string PicoDet::get_label_str(int label) { return labels[label]; }
{
return labels[label];
}
inline float fast_exp(float x) inline float fast_exp(float x) {
{ union {
union uint32_t i;
{ float f;
uint32_t i; } v{};
float f; v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
} v{}; return v.f;
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
} }
inline float sigmoid(float x) inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
{
return 1.0f / (1.0f + fast_exp(-x));
}
template <typename _Tp> template <typename _Tp>
int activation_function_softmax(const _Tp *src, _Tp *dst, int length) int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
{ const _Tp alpha = *std::max_element(src, src + length);
const _Tp alpha = *std::max_element(src, src + length); _Tp denominator{0};
_Tp denominator{0};
for (int i = 0; i < length; ++i)
{
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i) for (int i = 0; i < length; ++i) {
{ dst[i] = fast_exp(src[i] - alpha);
dst[i] /= denominator; denominator += dst[i];
} }
for (int i = 0; i < length; ++i) {
dst[i] /= denominator;
}
return 0; return 0;
} }
...@@ -17,186 +17,169 @@ ...@@ -17,186 +17,169 @@
#include <benchmark.h> #include <benchmark.h>
#include <iostream> #include <iostream>
inline float fast_exp(float x) inline float fast_exp(float x) {
{ union {
union { uint32_t i;
uint32_t i; float f;
float f; } v{};
} v{}; v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); return v.f;
return v.f;
} }
inline float sigmoid(float x) inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
{
return 1.0f / (1.0f + fast_exp(-x));
}
template<typename _Tp> template <typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length) int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
{ const _Tp alpha = *std::max_element(src, src + length);
const _Tp alpha = *std::max_element(src, src + length); _Tp denominator{0};
_Tp denominator{ 0 };
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
dst[i] = fast_exp(src[i] - alpha); dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i]; denominator += dst[i];
} }
for (int i = 0; i < length; ++i) { for (int i = 0; i < length; ++i) {
dst[i] /= denominator; dst[i] /= denominator;
} }
return 0; return 0;
} }
bool PicoDet::hasGPU = false; bool PicoDet::hasGPU = false;
PicoDet* PicoDet::detector = nullptr; PicoDet *PicoDet::detector = nullptr;
PicoDet::PicoDet(const char* param, const char* bin, bool useGPU) PicoDet::PicoDet(const char *param, const char *bin, bool useGPU) {
{ this->Net = new ncnn::Net();
this->Net = new ncnn::Net();
#if NCNN_VULKAN #if NCNN_VULKAN
this->hasGPU = ncnn::get_gpu_count() > 0; this->hasGPU = ncnn::get_gpu_count() > 0;
#endif #endif
this->Net->opt.use_vulkan_compute = this->hasGPU && useGPU; this->Net->opt.use_vulkan_compute = this->hasGPU && useGPU;
this->Net->opt.use_fp16_arithmetic = true; this->Net->opt.use_fp16_arithmetic = true;
this->Net->load_param(param); this->Net->load_param(param);
this->Net->load_model(bin); this->Net->load_model(bin);
} }
PicoDet::~PicoDet() PicoDet::~PicoDet() { delete this->Net; }
{
delete this->Net;
}
void PicoDet::preprocess(cv::Mat& image, ncnn::Mat& in) void PicoDet::preprocess(cv::Mat &image, ncnn::Mat &in) {
{ int img_w = image.cols;
int img_w = image.cols; int img_h = image.rows;
int img_h = image.rows; in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h);
in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h); const float mean_vals[3] = {103.53f, 116.28f, 123.675f};
const float mean_vals[3] = { 103.53f, 116.28f, 123.675f }; const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f};
const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f }; in.substract_mean_normalize(mean_vals, norm_vals);
in.substract_mean_normalize(mean_vals, norm_vals);
} }
std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold) std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold,
{ float nms_threshold) {
ncnn::Mat input; ncnn::Mat input;
preprocess(image, input); preprocess(image, input);
auto ex = this->Net->create_extractor(); auto ex = this->Net->create_extractor();
ex.set_light_mode(false); ex.set_light_mode(false);
ex.set_num_threads(4); ex.set_num_threads(4);
#if NCNN_VULKAN #if NCNN_VULKAN
ex.set_vulkan_compute(this->hasGPU); ex.set_vulkan_compute(this->hasGPU);
#endif #endif
ex.input("image", input); //picodet ex.input("image", input); // picodet
std::vector<std::vector<BoxInfo>> results; std::vector<std::vector<BoxInfo>> results;
results.resize(this->num_class); results.resize(this->num_class);
for (const auto& head_info : this->heads_info) for (const auto &head_info : this->heads_info) {
{ ncnn::Mat dis_pred;
ncnn::Mat dis_pred; ncnn::Mat cls_pred;
ncnn::Mat cls_pred; ex.extract(head_info.dis_layer.c_str(), dis_pred);
ex.extract(head_info.dis_layer.c_str(), dis_pred); ex.extract(head_info.cls_layer.c_str(), cls_pred);
ex.extract(head_info.cls_layer.c_str(), cls_pred); this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold,
this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results); results);
} }
std::vector<BoxInfo> dets; std::vector<BoxInfo> dets;
for (int i = 0; i < (int)results.size(); i++) for (int i = 0; i < (int)results.size(); i++) {
{ this->nms(results[i], nms_threshold);
this->nms(results[i], nms_threshold);
for (auto box : results[i]) {
for (auto box : results[i]) dets.push_back(box);
{
dets.push_back(box);
}
} }
return dets; }
return dets;
} }
void PicoDet::decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results) void PicoDet::decode_infer(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, int stride,
{ float threshold,
int feature_h = this->input_size[1] / stride; std::vector<std::vector<BoxInfo>> &results) {
int feature_w = this->input_size[0] / stride; int feature_h = ceil((float)this->input_size[1] / stride);
int feature_w = ceil((float)this->input_size[0] / stride);
for (int idx = 0; idx < feature_h * feature_w; idx++)
{ for (int idx = 0; idx < feature_h * feature_w; idx++) {
const float* scores = cls_pred.row(idx); const float *scores = cls_pred.row(idx);
int row = idx / feature_w; int row = idx / feature_w;
int col = idx % feature_w; int col = idx % feature_w;
float score = 0; float score = 0;
int cur_label = 0; int cur_label = 0;
for (int label = 0; label < this->num_class; label++) for (int label = 0; label < this->num_class; label++) {
{ if (scores[label] > score) {
if (scores[label] > score) score = scores[label];
{ cur_label = label;
score = scores[label]; }
cur_label = label;
}
}
if (score > threshold)
{
const float* bbox_pred = dis_pred.row(idx);
results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
} }
if (score > threshold) {
const float *bbox_pred = dis_pred.row(idx);
results[cur_label].push_back(
this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
} }
BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride) BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score,
{ int x, int y, int stride) {
float ct_x = (x + 0.5) * stride; float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride; float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred; std::vector<float> dis_pred;
dis_pred.resize(4); dis_pred.resize(4);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++) {
{ float dis = 0;
float dis = 0; float *dis_after_sm = new float[this->reg_max + 1];
float* dis_after_sm = new float[this->reg_max + 1]; activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm,
activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm, this->reg_max + 1); this->reg_max + 1);
for (int j = 0; j < this->reg_max + 1; j++) for (int j = 0; j < this->reg_max + 1; j++) {
{ dis += j * dis_after_sm[j];
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
} }
float xmin = (std::max)(ct_x - dis_pred[0], .0f); dis *= stride;
float ymin = (std::max)(ct_y - dis_pred[1], .0f); dis_pred[i] = dis;
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]); delete[] dis_after_sm;
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]); }
return BoxInfo { xmin, ymin, xmax, ymax, score, label }; float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]);
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]);
return BoxInfo{xmin, ymin, xmax, ymax, score, label};
} }
void PicoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH) void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
{ std::sort(input_boxes.begin(), input_boxes.end(),
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size()); std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) { for (int i = 0; i < int(input_boxes.size()); ++i) {
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
} }
for (int i = 0; i < int(input_boxes.size()); ++i) { for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int j = i + 1; j < int(input_boxes.size());) { for (int j = i + 1; j < int(input_boxes.size());) {
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float w = (std::max)(float(0), xx2 - xx1 + 1); float w = (std::max)(float(0), xx2 - xx1 + 1);
float h = (std::max)(float(0), yy2 - yy1 + 1); float h = (std::max)(float(0), yy2 - yy1 + 1);
float inter = w * h; float inter = w * h;
float ovr = inter / (vArea[i] + vArea[j] - inter); float ovr = inter / (vArea[i] + vArea[j] - inter);
if (ovr >= NMS_THRESH) { if (ovr >= NMS_THRESH) {
input_boxes.erase(input_boxes.begin() + j); input_boxes.erase(input_boxes.begin() + j);
vArea.erase(vArea.begin() + j); vArea.erase(vArea.begin() + j);
} } else {
else { j++;
j++; }
}
}
} }
}
} }
...@@ -14,338 +14,289 @@ ...@@ -14,338 +14,289 @@
// reference from https://github.com/RangiLyu/nanodet // reference from https://github.com/RangiLyu/nanodet
#include "picodet_openvino.h" #include "picodet_openvino.h"
#include <iostream>
#include <opencv2/core/core.hpp> #include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp> #include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp> #include <opencv2/imgproc/imgproc.hpp>
#include <iostream>
#define image_size 416 #define image_size 416
struct object_rect { struct object_rect {
int x; int x;
int y; int y;
int width; int width;
int height; int height;
}; };
int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area) int resize_uniform(cv::Mat &src, cv::Mat &dst, cv::Size dst_size,
{ object_rect &effect_area) {
int w = src.cols; int w = src.cols;
int h = src.rows; int h = src.rows;
int dst_w = dst_size.width; int dst_w = dst_size.width;
int dst_h = dst_size.height; int dst_h = dst_size.height;
dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0)); dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0));
float ratio_src = w * 1.0 / h; float ratio_src = w * 1.0 / h;
float ratio_dst = dst_w * 1.0 / dst_h; float ratio_dst = dst_w * 1.0 / dst_h;
int tmp_w = 0; int tmp_w = 0;
int tmp_h = 0; int tmp_h = 0;
if (ratio_src > ratio_dst) { if (ratio_src > ratio_dst) {
tmp_w = dst_w; tmp_w = dst_w;
tmp_h = floor((dst_w * 1.0 / w) * h); tmp_h = floor((dst_w * 1.0 / w) * h);
} } else if (ratio_src < ratio_dst) {
else if (ratio_src < ratio_dst) { tmp_h = dst_h;
tmp_h = dst_h; tmp_w = floor((dst_h * 1.0 / h) * w);
tmp_w = floor((dst_h * 1.0 / h) * w); } else {
} cv::resize(src, dst, dst_size);
else { effect_area.x = 0;
cv::resize(src, dst, dst_size); effect_area.y = 0;
effect_area.x = 0; effect_area.width = dst_w;
effect_area.y = 0; effect_area.height = dst_h;
effect_area.width = dst_w; return 0;
effect_area.height = dst_h; }
return 0; cv::Mat tmp;
} cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
cv::Mat tmp;
cv::resize(src, tmp, cv::Size(tmp_w, tmp_h));
if (tmp_w != dst_w) { if (tmp_w != dst_w) {
int index_w = floor((dst_w - tmp_w) / 2.0); int index_w = floor((dst_w - tmp_w) / 2.0);
for (int i = 0; i < dst_h; i++) { for (int i = 0; i < dst_h; i++) {
memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3); memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3,
} tmp_w * 3);
effect_area.x = index_w;
effect_area.y = 0;
effect_area.width = tmp_w;
effect_area.height = tmp_h;
} }
else if (tmp_h != dst_h) { effect_area.x = index_w;
int index_h = floor((dst_h - tmp_h) / 2.0); effect_area.y = 0;
memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3); effect_area.width = tmp_w;
effect_area.x = 0; effect_area.height = tmp_h;
effect_area.y = index_h; } else if (tmp_h != dst_h) {
effect_area.width = tmp_w; int index_h = floor((dst_h - tmp_h) / 2.0);
effect_area.height = tmp_h; memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3);
} effect_area.x = 0;
else { effect_area.y = index_h;
printf("error\n"); effect_area.width = tmp_w;
} effect_area.height = tmp_h;
return 0; } else {
printf("error\n");
}
return 0;
} }
const int color_list[80][3] = const int color_list[80][3] = {
{ {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47},
{216 , 82 , 24}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, {153, 153, 153},
{236 ,176 , 31}, {255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0},
{125 , 46 ,141}, {0, 0, 255}, {170, 0, 255}, {84, 84, 0}, {84, 170, 0},
{118 ,171 , 47}, {84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0},
{ 76 ,189 ,237}, {255, 84, 0}, {255, 170, 0}, {255, 255, 0}, {0, 84, 127},
{238 , 19 , 46}, {0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127},
{ 76 , 76 , 76}, {84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127},
{153 ,153 ,153}, {170, 170, 127}, {170, 255, 127}, {255, 0, 127}, {255, 84, 127},
{255 , 0 , 0}, {255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255},
{255 ,127 , 0}, {0, 255, 255}, {84, 0, 255}, {84, 84, 255}, {84, 170, 255},
{190 ,190 , 0}, {84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255},
{ 0 ,255 , 0}, {170, 255, 255}, {255, 0, 255}, {255, 84, 255}, {255, 170, 255},
{ 0 , 0 ,255}, {42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0},
{170 , 0 ,255}, {212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0},
{ 84 , 84 , 0}, {0, 127, 0}, {0, 170, 0}, {0, 212, 0}, {0, 255, 0},
{ 84 ,170 , 0}, {0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170},
{ 84 ,255 , 0}, {0, 0, 212}, {0, 0, 255}, {0, 0, 0}, {36, 36, 36},
{170 , 84 , 0}, {72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182},
{170 ,170 , 0}, {218, 218, 218}, {0, 113, 188}, {80, 182, 188}, {127, 127, 0},
{170 ,255 , 0},
{255 , 84 , 0},
{255 ,170 , 0},
{255 ,255 , 0},
{ 0 , 84 ,127},
{ 0 ,170 ,127},
{ 0 ,255 ,127},
{ 84 , 0 ,127},
{ 84 , 84 ,127},
{ 84 ,170 ,127},
{ 84 ,255 ,127},
{170 , 0 ,127},
{170 , 84 ,127},
{170 ,170 ,127},
{170 ,255 ,127},
{255 , 0 ,127},
{255 , 84 ,127},
{255 ,170 ,127},
{255 ,255 ,127},
{ 0 , 84 ,255},
{ 0 ,170 ,255},
{ 0 ,255 ,255},
{ 84 , 0 ,255},
{ 84 , 84 ,255},
{ 84 ,170 ,255},
{ 84 ,255 ,255},
{170 , 0 ,255},
{170 , 84 ,255},
{170 ,170 ,255},
{170 ,255 ,255},
{255 , 0 ,255},
{255 , 84 ,255},
{255 ,170 ,255},
{ 42 , 0 , 0},
{ 84 , 0 , 0},
{127 , 0 , 0},
{170 , 0 , 0},
{212 , 0 , 0},
{255 , 0 , 0},
{ 0 , 42 , 0},
{ 0 , 84 , 0},
{ 0 ,127 , 0},
{ 0 ,170 , 0},
{ 0 ,212 , 0},
{ 0 ,255 , 0},
{ 0 , 0 , 42},
{ 0 , 0 , 84},
{ 0 , 0 ,127},
{ 0 , 0 ,170},
{ 0 , 0 ,212},
{ 0 , 0 ,255},
{ 0 , 0 , 0},
{ 36 , 36 , 36},
{ 72 , 72 , 72},
{109 ,109 ,109},
{145 ,145 ,145},
{182 ,182 ,182},
{218 ,218 ,218},
{ 0 ,113 ,188},
{ 80 ,182 ,188},
{127 ,127 , 0},
}; };
void draw_bboxes(const cv::Mat& bgr, const std::vector<BoxInfo>& bboxes, object_rect effect_roi) void draw_bboxes(const cv::Mat &bgr, const std::vector<BoxInfo> &bboxes,
{ object_rect effect_roi) {
static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", static const char *class_names[] = {
"train", "truck", "boat", "traffic light", "fire hydrant", "person", "bicycle", "car",
"stop sign", "parking meter", "bench", "bird", "cat", "dog", "motorcycle", "airplane", "bus",
"horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "train", "truck", "boat",
"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "traffic light", "fire hydrant", "stop sign",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "parking meter", "bench", "bird",
"baseball glove", "skateboard", "surfboard", "tennis racket", "cat", "dog", "horse",
"bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "sheep", "cow", "elephant",
"banana", "apple", "sandwich", "orange", "broccoli", "carrot", "bear", "zebra", "giraffe",
"hot dog", "pizza", "donut", "cake", "chair", "couch", "backpack", "umbrella", "handbag",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "tie", "suitcase", "frisbee",
"mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "skis", "snowboard", "sports ball",
"toaster", "sink", "refrigerator", "book", "clock", "vase", "kite", "baseball bat", "baseball glove",
"scissors", "teddy bear", "hair drier", "toothbrush" "skateboard", "surfboard", "tennis racket",
}; "bottle", "wine glass", "cup",
"fork", "knife", "spoon",
"bowl", "banana", "apple",
"sandwich", "orange", "broccoli",
"carrot", "hot dog", "pizza",
"donut", "cake", "chair",
"couch", "potted plant", "bed",
"dining table", "toilet", "tv",
"laptop", "mouse", "remote",
"keyboard", "cell phone", "microwave",
"oven", "toaster", "sink",
"refrigerator", "book", "clock",
"vase", "scissors", "teddy bear",
"hair drier", "toothbrush"};
cv::Mat image = bgr.clone(); cv::Mat image = bgr.clone();
int src_w = image.cols; int src_w = image.cols;
int src_h = image.rows; int src_h = image.rows;
int dst_w = effect_roi.width; int dst_w = effect_roi.width;
int dst_h = effect_roi.height; int dst_h = effect_roi.height;
float width_ratio = (float)src_w / (float)dst_w; float width_ratio = (float)src_w / (float)dst_w;
float height_ratio = (float)src_h / (float)dst_h; float height_ratio = (float)src_h / (float)dst_h;
for (size_t i = 0; i < bboxes.size(); i++) {
const BoxInfo &bbox = bboxes[i];
cv::Scalar color =
cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1],
color_list[bbox.label][2]);
cv::rectangle(image,
cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio,
(bbox.y1 - effect_roi.y) * height_ratio),
cv::Point((bbox.x2 - effect_roi.x) * width_ratio,
(bbox.y2 - effect_roi.y) * height_ratio)),
color);
for (size_t i = 0; i < bboxes.size(); i++) char text[256];
{ sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100);
const BoxInfo& bbox = bboxes[i]; int baseLine = 0;
cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]); cv::Size label_size =
cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio), cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color); int x = (bbox.x1 - effect_roi.x) * width_ratio;
int y =
(bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
char text[256]; cv::rectangle(image, cv::Rect(cv::Point(x, y),
sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); cv::Size(label_size.width,
int baseLine = 0; label_size.height + baseLine)),
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); color, -1);
int x = (bbox.x1 - effect_roi.x) * width_ratio; cv::putText(image, text, cv::Point(x, y + label_size.height),
int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine; cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
if (y < 0) }
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), cv::imwrite("../predict.jpg", image);
color, -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255));
}
cv::imwrite("../predict.jpg",image);
} }
int image_demo(PicoDet &detector, const char *imagepath) {
std::vector<std::string> filenames;
cv::glob(imagepath, filenames, false);
int image_demo(PicoDet& detector, const char* imagepath) for (auto img_name : filenames) {
{ cv::Mat image = cv::imread(img_name);
std::vector<std::string> filenames; if (image.empty()) {
cv::glob(imagepath, filenames, false); return -1;
for (auto img_name : filenames)
{
cv::Mat image = cv::imread(img_name);
if (image.empty())
{
return -1;
}
object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(image_size, image_size), effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
} }
return 0; object_rect effect_roi;
cv::Mat resized_img;
resize_uniform(image, resized_img, cv::Size(image_size, image_size),
effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi);
}
return 0;
} }
int webcam_demo(PicoDet& detector, int cam_id) int webcam_demo(PicoDet &detector, int cam_id) {
{ cv::Mat image;
cv::Mat image; cv::VideoCapture cap(cam_id);
cv::VideoCapture cap(cam_id); while (true) {
cap >> image;
while (true) object_rect effect_roi;
{ cv::Mat resized_img;
cap >> image; resize_uniform(image, resized_img, cv::Size(image_size, image_size),
object_rect effect_roi; effect_roi);
cv::Mat resized_img; auto results = detector.detect(resized_img, 0.4, 0.5);
resize_uniform(image, resized_img, cv::Size(image_size, image_size), effect_roi); draw_bboxes(image, results, effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5); cv::waitKey(1);
draw_bboxes(image, results, effect_roi); }
cv::waitKey(1); return 0;
}
return 0;
} }
int video_demo(PicoDet& detector, const char* path) int video_demo(PicoDet &detector, const char *path) {
{ cv::Mat image;
cv::Mat image; cv::VideoCapture cap(path);
cv::VideoCapture cap(path);
while (true) while (true) {
{ cap >> image;
cap >> image; object_rect effect_roi;
object_rect effect_roi; cv::Mat resized_img;
cv::Mat resized_img; resize_uniform(image, resized_img, cv::Size(image_size, image_size),
resize_uniform(image, resized_img, cv::Size(image_size, image_size), effect_roi); effect_roi);
auto results = detector.detect(resized_img, 0.4, 0.5); auto results = detector.detect(resized_img, 0.4, 0.5);
draw_bboxes(image, results, effect_roi); draw_bboxes(image, results, effect_roi);
cv::waitKey(1); cv::waitKey(1);
} }
return 0; return 0;
} }
int benchmark(PicoDet& detector) int benchmark(PicoDet &detector) {
{ int loop_num = 100;
int loop_num = 100; int warm_up = 8;
int warm_up = 8;
double time_min = DBL_MAX; double time_min = DBL_MAX;
double time_max = -DBL_MAX; double time_max = -DBL_MAX;
double time_avg = 0; double time_avg = 0;
cv::Mat image(image_size, image_size, CV_8UC3, cv::Scalar(1, 1, 1)); cv::Mat image(image_size, image_size, CV_8UC3, cv::Scalar(1, 1, 1));
for (int i = 0; i < warm_up + loop_num; i++) for (int i = 0; i < warm_up + loop_num; i++) {
{ auto start = std::chrono::steady_clock::now();
auto start = std::chrono::steady_clock::now(); std::vector<BoxInfo> results;
std::vector<BoxInfo> results; results = detector.detect(image, 0.4, 0.5);
results = detector.detect(image, 0.4, 0.5); auto end = std::chrono::steady_clock::now();
auto end = std::chrono::steady_clock::now(); double time =
double time = std::chrono::duration<double, std::milli>(end - start).count(); std::chrono::duration<double, std::milli>(end - start).count();
if (i >= warm_up) if (i >= warm_up) {
{ time_min = (std::min)(time_min, time);
time_min = (std::min)(time_min, time); time_max = (std::max)(time_max, time);
time_max = (std::max)(time_max, time); time_avg += time;
time_avg += time;
}
} }
time_avg /= loop_num; }
fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", time_min, time_max, time_avg); time_avg /= loop_num;
return 0; fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet",
time_min, time_max, time_avg);
return 0;
} }
int main(int argc, char **argv) {
if (argc != 3) {
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is "
"cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n "
"For video, mode=2; \n For benchmark, mode=3 path=0.\n",
argv[0]);
return -1;
}
std::cout << "start init model" << std::endl;
auto detector = PicoDet("../weight/picodet_m_416.xml");
std::cout << "success" << std::endl;
int main(int argc, char** argv) int mode = atoi(argv[1]);
{ switch (mode) {
if (argc != 3) case 0: {
{ int cam_id = atoi(argv[2]);
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]); webcam_demo(detector, cam_id);
return -1; break;
} }
std::cout<<"start init model"<<std::endl; case 1: {
auto detector = PicoDet("../weight/picodet_m_416.xml"); const char *images = argv[2];
std::cout<<"success"<<std::endl; image_demo(detector, images);
break;
int mode = atoi(argv[1]); }
switch (mode) case 2: {
{ const char *path = argv[2];
case 0:{ video_demo(detector, path);
int cam_id = atoi(argv[2]); break;
webcam_demo(detector, cam_id); }
break; case 3: {
} benchmark(detector);
case 1:{ break;
const char* images = argv[2]; }
image_demo(detector, images); default: {
break; fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is "
} "cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n "
case 2:{ "For video, mode=2; \n For benchmark, mode=3 path=0.\n",
const char* path = argv[2]; argv[0]);
video_demo(detector, path); break;
break; }
} }
case 3:{
benchmark(detector);
break;
}
default:{
fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]);
break;
}
}
} }
...@@ -15,218 +15,195 @@ ...@@ -15,218 +15,195 @@
#include "picodet_openvino.h" #include "picodet_openvino.h"
inline float fast_exp(float x) inline float fast_exp(float x) {
{ union {
union { uint32_t i;
uint32_t i; float f;
float f; } v{};
} v{}; v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); return v.f;
return v.f;
} }
inline float sigmoid(float x) inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
{
return 1.0f / (1.0f + fast_exp(-x));
}
template<typename _Tp>
int activation_function_softmax(const _Tp* src, _Tp* dst, int length)
{
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{ 0 };
for (int i = 0; i < length; ++i)
{
dst[i] = fast_exp(src[i] - alpha);
denominator += dst[i];
}
for (int i = 0; i < length; ++i)
{
dst[i] /= denominator;
}
return 0; template <typename _Tp>
} int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
const _Tp alpha = *std::max_element(src, src + length);
_Tp denominator{0};
PicoDet::PicoDet(const char* model_path) for (int i = 0; i < length; ++i) {
{ dst[i] = fast_exp(src[i] - alpha);
InferenceEngine::Core ie; denominator += dst[i];
InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path); }
// prepare input settings
InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
input_name_ = inputs_map.begin()->first;
InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
//prepare output settings
InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
for (auto &output_info : outputs_map)
{
output_info.second->setPrecision(InferenceEngine::Precision::FP32);
}
//get network for (int i = 0; i < length; ++i) {
network_ = ie.LoadNetwork(model, "CPU"); dst[i] /= denominator;
infer_request_ = network_.CreateInferRequest(); }
return 0;
} }
PicoDet::~PicoDet() PicoDet::PicoDet(const char *model_path) {
{ InferenceEngine::Core ie;
InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path);
// prepare input settings
InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo());
input_name_ = inputs_map.begin()->first;
InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second;
// prepare output settings
InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo());
for (auto &output_info : outputs_map) {
output_info.second->setPrecision(InferenceEngine::Precision::FP32);
}
// get network
network_ = ie.LoadNetwork(model, "CPU");
infer_request_ = network_.CreateInferRequest();
} }
void PicoDet::preprocess(cv::Mat& image, InferenceEngine::Blob::Ptr& blob) PicoDet::~PicoDet() {}
{
int img_w = image.cols; void PicoDet::preprocess(cv::Mat &image, InferenceEngine::Blob::Ptr &blob) {
int img_h = image.rows; int img_w = image.cols;
int channels = 3; int img_h = image.rows;
int channels = 3;
InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
if (!mblob) InferenceEngine::MemoryBlob::Ptr mblob =
{ InferenceEngine::as<InferenceEngine::MemoryBlob>(blob);
THROW_IE_EXCEPTION << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, " if (!mblob) {
<< "but by fact we were not able to cast inputBlob to MemoryBlob"; THROW_IE_EXCEPTION
} << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, "
auto mblobHolder = mblob->wmap(); << "but by fact we were not able to cast inputBlob to MemoryBlob";
float *blob_data = mblobHolder.as<float *>(); }
auto mblobHolder = mblob->wmap();
for (size_t c = 0; c < channels; c++) float *blob_data = mblobHolder.as<float *>();
{
for (size_t h = 0; h < img_h; h++) for (size_t c = 0; c < channels; c++) {
{ for (size_t h = 0; h < img_h; h++) {
for (size_t w = 0; w < img_w; w++) for (size_t w = 0; w < img_w; w++) {
{ blob_data[c * img_w * img_h + h * img_w + w] =
blob_data[c * img_w * img_h + h * img_w + w] = (float)image.at<cv::Vec3b>(h, w)[c];
(float)image.at<cv::Vec3b>(h, w)[c]; }
}
}
} }
}
} }
std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold) std::vector<BoxInfo> PicoDet::detect(cv::Mat image, float score_threshold,
{ float nms_threshold) {
InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_); InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_);
preprocess(image, input_blob); preprocess(image, input_blob);
// do inference // do inference
infer_request_.Infer(); infer_request_.Infer();
// get output // get output
std::vector<std::vector<BoxInfo>> results; std::vector<std::vector<BoxInfo>> results;
results.resize(this->num_class_); results.resize(this->num_class_);
for (const auto& head_info : this->heads_info_) for (const auto &head_info : this->heads_info_) {
{ const InferenceEngine::Blob::Ptr dis_pred_blob =
const InferenceEngine::Blob::Ptr dis_pred_blob = infer_request_.GetBlob(head_info.dis_layer); infer_request_.GetBlob(head_info.dis_layer);
const InferenceEngine::Blob::Ptr cls_pred_blob = infer_request_.GetBlob(head_info.cls_layer); const InferenceEngine::Blob::Ptr cls_pred_blob =
infer_request_.GetBlob(head_info.cls_layer);
auto mdis_pred = InferenceEngine::as<InferenceEngine::MemoryBlob>(dis_pred_blob);
auto mdis_pred_holder = mdis_pred->rmap(); auto mdis_pred =
const float *dis_pred = mdis_pred_holder.as<const float *>(); InferenceEngine::as<InferenceEngine::MemoryBlob>(dis_pred_blob);
auto mdis_pred_holder = mdis_pred->rmap();
auto mcls_pred = InferenceEngine::as<InferenceEngine::MemoryBlob>(cls_pred_blob); const float *dis_pred = mdis_pred_holder.as<const float *>();
auto mcls_pred_holder = mcls_pred->rmap();
const float *cls_pred = mcls_pred_holder.as<const float *>(); auto mcls_pred =
this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results); InferenceEngine::as<InferenceEngine::MemoryBlob>(cls_pred_blob);
} auto mcls_pred_holder = mcls_pred->rmap();
const float *cls_pred = mcls_pred_holder.as<const float *>();
std::vector<BoxInfo> dets; this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold,
for (int i = 0; i < (int)results.size(); i++) results);
{ }
this->nms(results[i], nms_threshold);
std::vector<BoxInfo> dets;
for (auto& box : results[i]) for (int i = 0; i < (int)results.size(); i++) {
{ this->nms(results[i], nms_threshold);
dets.push_back(box);
} for (auto &box : results[i]) {
dets.push_back(box);
} }
return dets; }
return dets;
} }
void PicoDet::decode_infer(const float*& cls_pred, const float*& dis_pred, int stride, float threshold, std::vector<std::vector<BoxInfo>>& results) void PicoDet::decode_infer(const float *&cls_pred, const float *&dis_pred,
{ int stride, float threshold,
int feature_h = input_size_ / stride; std::vector<std::vector<BoxInfo>> &results) {
int feature_w = input_size_ / stride; int feature_h = ceil((float)input_size_ / stride);
for (int idx = 0; idx < feature_h * feature_w; idx++) int feature_w = ceil((float)input_size_ / stride);
{ for (int idx = 0; idx < feature_h * feature_w; idx++) {
int row = idx / feature_w; int row = idx / feature_w;
int col = idx % feature_w; int col = idx % feature_w;
float score = 0; float score = 0;
int cur_label = 0; int cur_label = 0;
for (int label = 0; label < num_class_; label++) for (int label = 0; label < num_class_; label++) {
{ if (cls_pred[idx * num_class_ + label] > score) {
if (cls_pred[idx * num_class_ +label] > score) score = cls_pred[idx * num_class_ + label];
{ cur_label = label;
score = cls_pred[idx * num_class_ + label]; }
cur_label = label;
}
}
if (score > threshold)
{
const float* bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4;
results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
} }
if (score > threshold) {
const float *bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4;
results[cur_label].push_back(
this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
}
}
} }
BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride) BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score,
{ int x, int y, int stride) {
float ct_x = (x + 0.5) * stride; float ct_x = (x + 0.5) * stride;
float ct_y = (y + 0.5) * stride; float ct_y = (y + 0.5) * stride;
std::vector<float> dis_pred; std::vector<float> dis_pred;
dis_pred.resize(4); dis_pred.resize(4);
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++) {
{ float dis = 0;
float dis = 0; float *dis_after_sm = new float[reg_max_ + 1];
float* dis_after_sm = new float[reg_max_ + 1]; activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm,
activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm, reg_max_ + 1); reg_max_ + 1);
for (int j = 0; j < reg_max_ + 1; j++) for (int j = 0; j < reg_max_ + 1; j++) {
{ dis += j * dis_after_sm[j];
dis += j * dis_after_sm[j];
}
dis *= stride;
dis_pred[i] = dis;
delete[] dis_after_sm;
} }
float xmin = (std::max)(ct_x - dis_pred[0], .0f); dis *= stride;
float ymin = (std::max)(ct_y - dis_pred[1], .0f); dis_pred[i] = dis;
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_); delete[] dis_after_sm;
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_); }
return BoxInfo { xmin, ymin, xmax, ymax, score, label }; float xmin = (std::max)(ct_x - dis_pred[0], .0f);
float ymin = (std::max)(ct_y - dis_pred[1], .0f);
float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_);
float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_);
return BoxInfo{xmin, ymin, xmax, ymax, score, label};
} }
void PicoDet::nms(std::vector<BoxInfo>& input_boxes, float NMS_THRESH) void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
{ std::sort(input_boxes.begin(), input_boxes.end(),
std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); [](BoxInfo a, BoxInfo b) { return a.score > b.score; });
std::vector<float> vArea(input_boxes.size()); std::vector<float> vArea(input_boxes.size());
for (int i = 0; i < int(input_boxes.size()); ++i) for (int i = 0; i < int(input_boxes.size()); ++i) {
{ vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
* (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); }
} for (int i = 0; i < int(input_boxes.size()); ++i) {
for (int i = 0; i < int(input_boxes.size()); ++i) for (int j = i + 1; j < int(input_boxes.size());) {
{ float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
for (int j = i + 1; j < int(input_boxes.size());) float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
{ float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); float w = (std::max)(float(0), xx2 - xx1 + 1);
float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); float h = (std::max)(float(0), yy2 - yy1 + 1);
float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); float inter = w * h;
float w = (std::max)(float(0), xx2 - xx1 + 1); float ovr = inter / (vArea[i] + vArea[j] - inter);
float h = (std::max)(float(0), yy2 - yy1 + 1); if (ovr >= NMS_THRESH) {
float inter = w * h; input_boxes.erase(input_boxes.begin() + j);
float ovr = inter / (vArea[i] + vArea[j] - inter); vArea.erase(vArea.begin() + j);
if (ovr >= NMS_THRESH) } else {
{ j++;
input_boxes.erase(input_boxes.begin() + j); }
vArea.erase(vArea.begin() + j);
}
else
{
j++;
}
}
} }
}
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册