From 2580dc013ba6d568510ef8ea413e6629d8268358 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Wed, 5 Jan 2022 15:04:56 +0800 Subject: [PATCH] fix picodet cpp infer (#5065) --- deploy/cpp/include/picodet_postprocess.h | 27 +- deploy/cpp/src/picodet_postprocess.cc | 126 ++-- deploy/lite/src/picodet_postprocess.cc | 126 ++-- deploy/third_engine/demo_mnn/picodet_mnn.cpp | 370 ++++++------ deploy/third_engine/demo_ncnn/picodet.cpp | 289 +++++----- deploy/third_engine/demo_openvino/main.cpp | 543 ++++++++---------- .../demo_openvino/picodet_openvino.cpp | 359 ++++++------ 7 files changed, 863 insertions(+), 977 deletions(-) diff --git a/deploy/cpp/include/picodet_postprocess.h b/deploy/cpp/include/picodet_postprocess.h index 415ef69e5..c0705e85d 100644 --- a/deploy/cpp/include/picodet_postprocess.h +++ b/deploy/cpp/include/picodet_postprocess.h @@ -14,25 +14,24 @@ #pragma once -#include -#include -#include -#include +#include #include +#include #include +#include +#include +#include #include "include/utils.h" namespace PaddleDetection { -void PicoDetPostProcess(std::vector* results, - std::vector outs, - std::vector fpn_stride, - std::vector im_shape, - std::vector scale_factor, - float score_threshold = 0.3, - float nms_threshold = 0.5, - int num_class = 80, - int reg_max = 7); +void PicoDetPostProcess(std::vector *results, + std::vector outs, + std::vector fpn_stride, + std::vector im_shape, + std::vector scale_factor, + float score_threshold = 0.3, float nms_threshold = 0.5, + int num_class = 80, int reg_max = 7); -} // namespace PaddleDetection \ No newline at end of file +} // namespace PaddleDetection diff --git a/deploy/cpp/src/picodet_postprocess.cc b/deploy/cpp/src/picodet_postprocess.cc index cbe70d43f..7f40a2658 100644 --- a/deploy/cpp/src/picodet_postprocess.cc +++ b/deploy/cpp/src/picodet_postprocess.cc @@ -20,79 +20,76 @@ namespace PaddleDetection { float fast_exp(float x) { - union { - uint32_t i; - float f; - } v{}; - v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); - return v.f; + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; } template int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { - const _Tp alpha = *std::max_element(src, src + length); - _Tp denominator{0}; + const _Tp alpha = *std::max_element(src, src + length); + _Tp denominator{0}; - for (int i = 0; i < length; ++i) { - dst[i] = fast_exp(src[i] - alpha); - denominator += dst[i]; - } + for (int i = 0; i < length; ++i) { + dst[i] = fast_exp(src[i] - alpha); + denominator += dst[i]; + } - for (int i = 0; i < length; ++i) { - dst[i] /= denominator; - } + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } - return 0; + return 0; } // PicoDet decode -PaddleDetection::ObjectResult disPred2Bbox(const float *&dfl_det, int label, float score, - int x, int y, int stride, std::vector im_shape, - int reg_max) { - float ct_x = (x + 0.5) * stride; - float ct_y = (y + 0.5) * stride; - std::vector dis_pred; - dis_pred.resize(4); - for (int i = 0; i < 4; i++) { - float dis = 0; - float* dis_after_sm = new float[reg_max + 1]; - activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); - for (int j = 0; j < reg_max + 1; j++) { - dis += j * dis_after_sm[j]; - } - dis *= stride; - dis_pred[i] = dis; - delete[] dis_after_sm; +PaddleDetection::ObjectResult +disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, + int stride, std::vector im_shape, int reg_max) { + float ct_x = (x + 0.5) * stride; + float ct_y = (y + 0.5) * stride; + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + float dis = 0; + float *dis_after_sm = new float[reg_max + 1]; + activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, + reg_max + 1); + for (int j = 0; j < reg_max + 1; j++) { + dis += j * dis_after_sm[j]; } - int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f); - int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f); - int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]); - int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]); + dis *= stride; + dis_pred[i] = dis; + delete[] dis_after_sm; + } + int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f); + int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f); + int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]); + int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]); - PaddleDetection::ObjectResult result_item; - result_item.rect = {xmin, ymin, xmax, ymax}; - result_item.class_id = label; - result_item.confidence = score; + PaddleDetection::ObjectResult result_item; + result_item.rect = {xmin, ymin, xmax, ymax}; + result_item.class_id = label; + result_item.confidence = score; - return result_item; + return result_item; } - -void PicoDetPostProcess(std::vector* results, - std::vector outs, - std::vector fpn_stride, - std::vector im_shape, - std::vector scale_factor, - float score_threshold, - float nms_threshold, - int num_class, - int reg_max) { +void PicoDetPostProcess(std::vector *results, + std::vector outs, + std::vector fpn_stride, + std::vector im_shape, + std::vector scale_factor, float score_threshold, + float nms_threshold, int num_class, int reg_max) { std::vector> bbox_results; bbox_results.resize(num_class); int in_h = im_shape[0], in_w = im_shape[1]; for (int i = 0; i < fpn_stride.size(); ++i) { - int feature_h = in_h / fpn_stride[i]; - int feature_w = in_w / fpn_stride[i]; + int feature_h = std::ceil((float)in_h / fpn_stride[i]); + int feature_w = std::ceil((float)in_w / fpn_stride[i]); for (int idx = 0; idx < feature_h * feature_w; idx++) { const float *scores = outs[i] + (idx * num_class); @@ -107,10 +104,11 @@ void PicoDetPostProcess(std::vector* results, } } if (score > score_threshold) { - const float *bbox_pred = outs[i + fpn_stride.size()] - + (idx * 4 * (reg_max + 1)); - bbox_results[cur_label].push_back(disPred2Bbox(bbox_pred, - cur_label, score, col, row, fpn_stride[i], im_shape, reg_max)); + const float *bbox_pred = + outs[i + fpn_stride.size()] + (idx * 4 * (reg_max + 1)); + bbox_results[cur_label].push_back( + disPred2Bbox(bbox_pred, cur_label, score, col, row, fpn_stride[i], + im_shape, reg_max)); } } } @@ -118,13 +116,13 @@ void PicoDetPostProcess(std::vector* results, PaddleDetection::nms(bbox_results[i], nms_threshold); for (auto box : bbox_results[i]) { - box.rect[0] = box.rect[0] / scale_factor[1]; - box.rect[2] = box.rect[2] / scale_factor[1]; - box.rect[1] = box.rect[1] / scale_factor[0]; - box.rect[3] = box.rect[3] / scale_factor[0]; - results->push_back(box); + box.rect[0] = box.rect[0] / scale_factor[1]; + box.rect[2] = box.rect[2] / scale_factor[1]; + box.rect[1] = box.rect[1] / scale_factor[0]; + box.rect[3] = box.rect[3] / scale_factor[0]; + results->push_back(box); } } } -} // namespace PaddleDetection +} // namespace PaddleDetection diff --git a/deploy/lite/src/picodet_postprocess.cc b/deploy/lite/src/picodet_postprocess.cc index cbe70d43f..32625249f 100644 --- a/deploy/lite/src/picodet_postprocess.cc +++ b/deploy/lite/src/picodet_postprocess.cc @@ -20,79 +20,76 @@ namespace PaddleDetection { float fast_exp(float x) { - union { - uint32_t i; - float f; - } v{}; - v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); - return v.f; + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; } template int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { - const _Tp alpha = *std::max_element(src, src + length); - _Tp denominator{0}; + const _Tp alpha = *std::max_element(src, src + length); + _Tp denominator{0}; - for (int i = 0; i < length; ++i) { - dst[i] = fast_exp(src[i] - alpha); - denominator += dst[i]; - } + for (int i = 0; i < length; ++i) { + dst[i] = fast_exp(src[i] - alpha); + denominator += dst[i]; + } - for (int i = 0; i < length; ++i) { - dst[i] /= denominator; - } + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } - return 0; + return 0; } // PicoDet decode -PaddleDetection::ObjectResult disPred2Bbox(const float *&dfl_det, int label, float score, - int x, int y, int stride, std::vector im_shape, - int reg_max) { - float ct_x = (x + 0.5) * stride; - float ct_y = (y + 0.5) * stride; - std::vector dis_pred; - dis_pred.resize(4); - for (int i = 0; i < 4; i++) { - float dis = 0; - float* dis_after_sm = new float[reg_max + 1]; - activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); - for (int j = 0; j < reg_max + 1; j++) { - dis += j * dis_after_sm[j]; - } - dis *= stride; - dis_pred[i] = dis; - delete[] dis_after_sm; +PaddleDetection::ObjectResult +disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, + int stride, std::vector im_shape, int reg_max) { + float ct_x = (x + 0.5) * stride; + float ct_y = (y + 0.5) * stride; + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + float dis = 0; + float *dis_after_sm = new float[reg_max + 1]; + activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, + reg_max + 1); + for (int j = 0; j < reg_max + 1; j++) { + dis += j * dis_after_sm[j]; } - int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f); - int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f); - int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]); - int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]); + dis *= stride; + dis_pred[i] = dis; + delete[] dis_after_sm; + } + int xmin = (int)(std::max)(ct_x - dis_pred[0], .0f); + int ymin = (int)(std::max)(ct_y - dis_pred[1], .0f); + int xmax = (int)(std::min)(ct_x + dis_pred[2], (float)im_shape[0]); + int ymax = (int)(std::min)(ct_y + dis_pred[3], (float)im_shape[1]); - PaddleDetection::ObjectResult result_item; - result_item.rect = {xmin, ymin, xmax, ymax}; - result_item.class_id = label; - result_item.confidence = score; + PaddleDetection::ObjectResult result_item; + result_item.rect = {xmin, ymin, xmax, ymax}; + result_item.class_id = label; + result_item.confidence = score; - return result_item; + return result_item; } - -void PicoDetPostProcess(std::vector* results, - std::vector outs, - std::vector fpn_stride, - std::vector im_shape, - std::vector scale_factor, - float score_threshold, - float nms_threshold, - int num_class, - int reg_max) { +void PicoDetPostProcess(std::vector *results, + std::vector outs, + std::vector fpn_stride, + std::vector im_shape, + std::vector scale_factor, float score_threshold, + float nms_threshold, int num_class, int reg_max) { std::vector> bbox_results; bbox_results.resize(num_class); int in_h = im_shape[0], in_w = im_shape[1]; for (int i = 0; i < fpn_stride.size(); ++i) { - int feature_h = in_h / fpn_stride[i]; - int feature_w = in_w / fpn_stride[i]; + int feature_h = ceil((float)in_h / fpn_stride[i]); + int feature_w = ceil((float)in_w / fpn_stride[i]); for (int idx = 0; idx < feature_h * feature_w; idx++) { const float *scores = outs[i] + (idx * num_class); @@ -107,10 +104,11 @@ void PicoDetPostProcess(std::vector* results, } } if (score > score_threshold) { - const float *bbox_pred = outs[i + fpn_stride.size()] - + (idx * 4 * (reg_max + 1)); - bbox_results[cur_label].push_back(disPred2Bbox(bbox_pred, - cur_label, score, col, row, fpn_stride[i], im_shape, reg_max)); + const float *bbox_pred = + outs[i + fpn_stride.size()] + (idx * 4 * (reg_max + 1)); + bbox_results[cur_label].push_back( + disPred2Bbox(bbox_pred, cur_label, score, col, row, fpn_stride[i], + im_shape, reg_max)); } } } @@ -118,13 +116,13 @@ void PicoDetPostProcess(std::vector* results, PaddleDetection::nms(bbox_results[i], nms_threshold); for (auto box : bbox_results[i]) { - box.rect[0] = box.rect[0] / scale_factor[1]; - box.rect[2] = box.rect[2] / scale_factor[1]; - box.rect[1] = box.rect[1] / scale_factor[0]; - box.rect[3] = box.rect[3] / scale_factor[0]; - results->push_back(box); + box.rect[0] = box.rect[0] / scale_factor[1]; + box.rect[2] = box.rect[2] / scale_factor[1]; + box.rect[1] = box.rect[1] / scale_factor[0]; + box.rect[3] = box.rect[3] / scale_factor[0]; + results->push_back(box); } } } -} // namespace PaddleDetection +} // namespace PaddleDetection diff --git a/deploy/third_engine/demo_mnn/picodet_mnn.cpp b/deploy/third_engine/demo_mnn/picodet_mnn.cpp index 7326f14fa..d6cb9c9fd 100644 --- a/deploy/third_engine/demo_mnn/picodet_mnn.cpp +++ b/deploy/third_engine/demo_mnn/picodet_mnn.cpp @@ -17,223 +17,203 @@ using namespace std; -PicoDet::PicoDet(const std::string &mnn_path, - int input_width, int input_length, int num_thread_, - float score_threshold_, float nms_threshold_) -{ - num_thread = num_thread_; - in_w = input_width; - in_h = input_length; - score_threshold = score_threshold_; - nms_threshold = nms_threshold_; - - PicoDet_interpreter = std::shared_ptr(MNN::Interpreter::createFromFile(mnn_path.c_str())); - MNN::ScheduleConfig config; - config.numThread = num_thread; - MNN::BackendConfig backendConfig; - backendConfig.precision = (MNN::BackendConfig::PrecisionMode) 2; - config.backendConfig = &backendConfig; - - PicoDet_session = PicoDet_interpreter->createSession(config); - - input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr); - +PicoDet::PicoDet(const std::string &mnn_path, int input_width, int input_length, + int num_thread_, float score_threshold_, + float nms_threshold_) { + num_thread = num_thread_; + in_w = input_width; + in_h = input_length; + score_threshold = score_threshold_; + nms_threshold = nms_threshold_; + + PicoDet_interpreter = std::shared_ptr( + MNN::Interpreter::createFromFile(mnn_path.c_str())); + MNN::ScheduleConfig config; + config.numThread = num_thread; + MNN::BackendConfig backendConfig; + backendConfig.precision = (MNN::BackendConfig::PrecisionMode)2; + config.backendConfig = &backendConfig; + + PicoDet_session = PicoDet_interpreter->createSession(config); + + input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr); } -PicoDet::~PicoDet() -{ - PicoDet_interpreter->releaseModel(); - PicoDet_interpreter->releaseSession(PicoDet_session); +PicoDet::~PicoDet() { + PicoDet_interpreter->releaseModel(); + PicoDet_interpreter->releaseSession(PicoDet_session); } -int PicoDet::detect(cv::Mat &raw_image, std::vector &result_list) -{ - if (raw_image.empty()) { - std::cout << "image is empty ,please check!" << std::endl; - return -1; - } - - image_h = raw_image.rows; - image_w = raw_image.cols; - cv::Mat image; - cv::resize(raw_image, image, cv::Size(in_w, in_h)); - - PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w}); - PicoDet_interpreter->resizeSession(PicoDet_session); - std::shared_ptr pretreat( - MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3, - norm_vals, 3)); - pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor); - - auto start = chrono::steady_clock::now(); - - // run network - PicoDet_interpreter->runSession(PicoDet_session); - - // get output data - std::vector> results; - results.resize(num_class); - - for (const auto &head_info : heads_info) - { - MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.cls_layer.c_str()); - MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(PicoDet_session, head_info.dis_layer.c_str()); - - MNN::Tensor tensor_scores_host(tensor_scores, tensor_scores->getDimensionType()); - tensor_scores->copyToHostTensor(&tensor_scores_host); - - MNN::Tensor tensor_boxes_host(tensor_boxes, tensor_boxes->getDimensionType()); - tensor_boxes->copyToHostTensor(&tensor_boxes_host); - - decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride, score_threshold, results); - } - - auto end = chrono::steady_clock::now(); - chrono::duration elapsed = end - start; - cout << "inference time:" << elapsed.count() << " s, "; - - for (int i = 0; i < (int)results.size(); i++) - { - nms(results[i], nms_threshold); - - for (auto box : results[i]) - { - box.x1 = box.x1 / in_w * image_w; - box.x2 = box.x2 / in_w * image_w; - box.y1 = box.y1 / in_h * image_h; - box.y2 = box.y2 / in_h * image_h; - result_list.push_back(box); - } +int PicoDet::detect(cv::Mat &raw_image, std::vector &result_list) { + if (raw_image.empty()) { + std::cout << "image is empty ,please check!" << std::endl; + return -1; + } + + image_h = raw_image.rows; + image_w = raw_image.cols; + cv::Mat image; + cv::resize(raw_image, image, cv::Size(in_w, in_h)); + + PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w}); + PicoDet_interpreter->resizeSession(PicoDet_session); + std::shared_ptr pretreat(MNN::CV::ImageProcess::create( + MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3, norm_vals, 3)); + pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor); + + auto start = chrono::steady_clock::now(); + + // run network + PicoDet_interpreter->runSession(PicoDet_session); + + // get output data + std::vector> results; + results.resize(num_class); + + for (const auto &head_info : heads_info) { + MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput( + PicoDet_session, head_info.cls_layer.c_str()); + MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput( + PicoDet_session, head_info.dis_layer.c_str()); + + MNN::Tensor tensor_scores_host(tensor_scores, + tensor_scores->getDimensionType()); + tensor_scores->copyToHostTensor(&tensor_scores_host); + + MNN::Tensor tensor_boxes_host(tensor_boxes, + tensor_boxes->getDimensionType()); + tensor_boxes->copyToHostTensor(&tensor_boxes_host); + + decode_infer(&tensor_scores_host, &tensor_boxes_host, head_info.stride, + score_threshold, results); + } + + auto end = chrono::steady_clock::now(); + chrono::duration elapsed = end - start; + cout << "inference time:" << elapsed.count() << " s, "; + + for (int i = 0; i < (int)results.size(); i++) { + nms(results[i], nms_threshold); + + for (auto box : results[i]) { + box.x1 = box.x1 / in_w * image_w; + box.x2 = box.x2 / in_w * image_w; + box.y1 = box.y1 / in_h * image_h; + box.y2 = box.y2 / in_h * image_h; + result_list.push_back(box); } - cout << "detect " << result_list.size() << " objects" << endl; + } + cout << "detect " << result_list.size() << " objects" << endl; - return 0; + return 0; } -void PicoDet::decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, int stride, float threshold, std::vector> &results) -{ - int feature_h = in_h / stride; - int feature_w = in_w / stride; - - for (int idx = 0; idx < feature_h * feature_w; idx++) - { - const float *scores = cls_pred->host() + (idx * num_class); - int row = idx / feature_w; - int col = idx % feature_w; - float score = 0; - int cur_label = 0; - for (int label = 0; label < num_class; label++) - { - if (scores[label] > score) - { - score = scores[label]; - cur_label = label; - } - } - if (score > threshold) - { - const float *bbox_pred = dis_pred->host() + (idx * 4 * (reg_max + 1)); - results[cur_label].push_back(disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); - } +void PicoDet::decode_infer(MNN::Tensor *cls_pred, MNN::Tensor *dis_pred, + int stride, float threshold, + std::vector> &results) { + int feature_h = ceil((float)in_h / stride); + int feature_w = ceil((float)in_w / stride); + + for (int idx = 0; idx < feature_h * feature_w; idx++) { + const float *scores = cls_pred->host() + (idx * num_class); + int row = idx / feature_w; + int col = idx % feature_w; + float score = 0; + int cur_label = 0; + for (int label = 0; label < num_class; label++) { + if (scores[label] > score) { + score = scores[label]; + cur_label = label; + } } + if (score > threshold) { + const float *bbox_pred = + dis_pred->host() + (idx * 4 * (reg_max + 1)); + results[cur_label].push_back( + disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); + } + } } -BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, int x, int y, int stride) -{ - float ct_x = (x + 0.5) * stride; - float ct_y = (y + 0.5) * stride; - std::vector dis_pred; - dis_pred.resize(4); - for (int i = 0; i < 4; i++) - { - float dis = 0; - float *dis_after_sm = new float[reg_max + 1]; - activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1); - for (int j = 0; j < reg_max + 1; j++) - { - dis += j * dis_after_sm[j]; - } - dis *= stride; - dis_pred[i] = dis; - delete[] dis_after_sm; +BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, + int x, int y, int stride) { + float ct_x = (x + 0.5) * stride; + float ct_y = (y + 0.5) * stride; + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + float dis = 0; + float *dis_after_sm = new float[reg_max + 1]; + activation_function_softmax(dfl_det + i * (reg_max + 1), dis_after_sm, + reg_max + 1); + for (int j = 0; j < reg_max + 1; j++) { + dis += j * dis_after_sm[j]; } - float xmin = (std::max)(ct_x - dis_pred[0], .0f); - float ymin = (std::max)(ct_y - dis_pred[1], .0f); - float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w); - float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h); - return BoxInfo{xmin, ymin, xmax, ymax, score, label}; + dis *= stride; + dis_pred[i] = dis; + delete[] dis_after_sm; + } + float xmin = (std::max)(ct_x - dis_pred[0], .0f); + float ymin = (std::max)(ct_y - dis_pred[1], .0f); + float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w); + float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h); + return BoxInfo{xmin, ymin, xmax, ymax, score, label}; } -void PicoDet::nms(std::vector &input_boxes, float NMS_THRESH) -{ - std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); - std::vector vArea(input_boxes.size()); - for (int i = 0; i < int(input_boxes.size()); ++i) - { - vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); - } - for (int i = 0; i < int(input_boxes.size()); ++i) - { - for (int j = i + 1; j < int(input_boxes.size());) - { - float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); - float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); - float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); - float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); - float w = (std::max)(float(0), xx2 - xx1 + 1); - float h = (std::max)(float(0), yy2 - yy1 + 1); - float inter = w * h; - float ovr = inter / (vArea[i] + vArea[j] - inter); - if (ovr >= NMS_THRESH) - { - input_boxes.erase(input_boxes.begin() + j); - vArea.erase(vArea.begin() + j); - } - else - { - j++; - } - } +void PicoDet::nms(std::vector &input_boxes, float NMS_THRESH) { + std::sort(input_boxes.begin(), input_boxes.end(), + [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); + std::vector vArea(input_boxes.size()); + for (int i = 0; i < int(input_boxes.size()); ++i) { + vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * + (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); + } + for (int i = 0; i < int(input_boxes.size()); ++i) { + for (int j = i + 1; j < int(input_boxes.size());) { + float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); + float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); + float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); + float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); + float w = (std::max)(float(0), xx2 - xx1 + 1); + float h = (std::max)(float(0), yy2 - yy1 + 1); + float inter = w * h; + float ovr = inter / (vArea[i] + vArea[j] - inter); + if (ovr >= NMS_THRESH) { + input_boxes.erase(input_boxes.begin() + j); + vArea.erase(vArea.begin() + j); + } else { + j++; + } } + } } -string PicoDet::get_label_str(int label) -{ - return labels[label]; -} +string PicoDet::get_label_str(int label) { return labels[label]; } -inline float fast_exp(float x) -{ - union - { - uint32_t i; - float f; - } v{}; - v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); - return v.f; +inline float fast_exp(float x) { + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; } -inline float sigmoid(float x) -{ - return 1.0f / (1.0f + fast_exp(-x)); -} +inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); } template -int activation_function_softmax(const _Tp *src, _Tp *dst, int length) -{ - const _Tp alpha = *std::max_element(src, src + length); - _Tp denominator{0}; - - for (int i = 0; i < length; ++i) - { - dst[i] = fast_exp(src[i] - alpha); - denominator += dst[i]; - } +int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { + const _Tp alpha = *std::max_element(src, src + length); + _Tp denominator{0}; - for (int i = 0; i < length; ++i) - { - dst[i] /= denominator; - } + for (int i = 0; i < length; ++i) { + dst[i] = fast_exp(src[i] - alpha); + denominator += dst[i]; + } + + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } - return 0; + return 0; } diff --git a/deploy/third_engine/demo_ncnn/picodet.cpp b/deploy/third_engine/demo_ncnn/picodet.cpp index 9ed6bca06..c4dec46b2 100644 --- a/deploy/third_engine/demo_ncnn/picodet.cpp +++ b/deploy/third_engine/demo_ncnn/picodet.cpp @@ -17,186 +17,169 @@ #include #include -inline float fast_exp(float x) -{ - union { - uint32_t i; - float f; - } v{}; - v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); - return v.f; +inline float fast_exp(float x) { + union { + uint32_t i; + float f; + } v{}; + v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f); + return v.f; } -inline float sigmoid(float x) -{ - return 1.0f / (1.0f + fast_exp(-x)); -} +inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); } -template -int activation_function_softmax(const _Tp* src, _Tp* dst, int length) -{ - const _Tp alpha = *std::max_element(src, src + length); - _Tp denominator{ 0 }; +template +int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { + const _Tp alpha = *std::max_element(src, src + length); + _Tp denominator{0}; - for (int i = 0; i < length; ++i) { - dst[i] = fast_exp(src[i] - alpha); - denominator += dst[i]; - } + for (int i = 0; i < length; ++i) { + dst[i] = fast_exp(src[i] - alpha); + denominator += dst[i]; + } - for (int i = 0; i < length; ++i) { - dst[i] /= denominator; - } + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } - return 0; + return 0; } bool PicoDet::hasGPU = false; -PicoDet* PicoDet::detector = nullptr; +PicoDet *PicoDet::detector = nullptr; -PicoDet::PicoDet(const char* param, const char* bin, bool useGPU) -{ - this->Net = new ncnn::Net(); +PicoDet::PicoDet(const char *param, const char *bin, bool useGPU) { + this->Net = new ncnn::Net(); #if NCNN_VULKAN - this->hasGPU = ncnn::get_gpu_count() > 0; + this->hasGPU = ncnn::get_gpu_count() > 0; #endif - this->Net->opt.use_vulkan_compute = this->hasGPU && useGPU; - this->Net->opt.use_fp16_arithmetic = true; - this->Net->load_param(param); - this->Net->load_model(bin); + this->Net->opt.use_vulkan_compute = this->hasGPU && useGPU; + this->Net->opt.use_fp16_arithmetic = true; + this->Net->load_param(param); + this->Net->load_model(bin); } -PicoDet::~PicoDet() -{ - delete this->Net; -} +PicoDet::~PicoDet() { delete this->Net; } -void PicoDet::preprocess(cv::Mat& image, ncnn::Mat& in) -{ - int img_w = image.cols; - int img_h = image.rows; - in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h); - const float mean_vals[3] = { 103.53f, 116.28f, 123.675f }; - const float norm_vals[3] = { 0.017429f, 0.017507f, 0.017125f }; - in.substract_mean_normalize(mean_vals, norm_vals); +void PicoDet::preprocess(cv::Mat &image, ncnn::Mat &in) { + int img_w = image.cols; + int img_h = image.rows; + in = ncnn::Mat::from_pixels(image.data, ncnn::Mat::PIXEL_BGR, img_w, img_h); + const float mean_vals[3] = {103.53f, 116.28f, 123.675f}; + const float norm_vals[3] = {0.017429f, 0.017507f, 0.017125f}; + in.substract_mean_normalize(mean_vals, norm_vals); } -std::vector PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold) -{ - ncnn::Mat input; - preprocess(image, input); - auto ex = this->Net->create_extractor(); - ex.set_light_mode(false); - ex.set_num_threads(4); +std::vector PicoDet::detect(cv::Mat image, float score_threshold, + float nms_threshold) { + ncnn::Mat input; + preprocess(image, input); + auto ex = this->Net->create_extractor(); + ex.set_light_mode(false); + ex.set_num_threads(4); #if NCNN_VULKAN - ex.set_vulkan_compute(this->hasGPU); + ex.set_vulkan_compute(this->hasGPU); #endif - ex.input("image", input); //picodet - - std::vector> results; - results.resize(this->num_class); - - for (const auto& head_info : this->heads_info) - { - ncnn::Mat dis_pred; - ncnn::Mat cls_pred; - ex.extract(head_info.dis_layer.c_str(), dis_pred); - ex.extract(head_info.cls_layer.c_str(), cls_pred); - this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results); - } - - std::vector dets; - for (int i = 0; i < (int)results.size(); i++) - { - this->nms(results[i], nms_threshold); - - for (auto box : results[i]) - { - dets.push_back(box); - } + ex.input("image", input); // picodet + + std::vector> results; + results.resize(this->num_class); + + for (const auto &head_info : this->heads_info) { + ncnn::Mat dis_pred; + ncnn::Mat cls_pred; + ex.extract(head_info.dis_layer.c_str(), dis_pred); + ex.extract(head_info.cls_layer.c_str(), cls_pred); + this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, + results); + } + + std::vector dets; + for (int i = 0; i < (int)results.size(); i++) { + this->nms(results[i], nms_threshold); + + for (auto box : results[i]) { + dets.push_back(box); } - return dets; + } + return dets; } -void PicoDet::decode_infer(ncnn::Mat& cls_pred, ncnn::Mat& dis_pred, int stride, float threshold, std::vector>& results) -{ - int feature_h = this->input_size[1] / stride; - int feature_w = this->input_size[0] / stride; - - for (int idx = 0; idx < feature_h * feature_w; idx++) - { - const float* scores = cls_pred.row(idx); - int row = idx / feature_w; - int col = idx % feature_w; - float score = 0; - int cur_label = 0; - for (int label = 0; label < this->num_class; label++) - { - if (scores[label] > score) - { - score = scores[label]; - cur_label = label; - } - } - if (score > threshold) - { - const float* bbox_pred = dis_pred.row(idx); - results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); - } - +void PicoDet::decode_infer(ncnn::Mat &cls_pred, ncnn::Mat &dis_pred, int stride, + float threshold, + std::vector> &results) { + int feature_h = ceil((float)this->input_size[1] / stride); + int feature_w = ceil((float)this->input_size[0] / stride); + + for (int idx = 0; idx < feature_h * feature_w; idx++) { + const float *scores = cls_pred.row(idx); + int row = idx / feature_w; + int col = idx % feature_w; + float score = 0; + int cur_label = 0; + for (int label = 0; label < this->num_class; label++) { + if (scores[label] > score) { + score = scores[label]; + cur_label = label; + } } + if (score > threshold) { + const float *bbox_pred = dis_pred.row(idx); + results[cur_label].push_back( + this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); + } + } } -BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride) -{ - float ct_x = (x + 0.5) * stride; - float ct_y = (y + 0.5) * stride; - std::vector dis_pred; - dis_pred.resize(4); - for (int i = 0; i < 4; i++) - { - float dis = 0; - float* dis_after_sm = new float[this->reg_max + 1]; - activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm, this->reg_max + 1); - for (int j = 0; j < this->reg_max + 1; j++) - { - dis += j * dis_after_sm[j]; - } - dis *= stride; - dis_pred[i] = dis; - delete[] dis_after_sm; +BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, + int x, int y, int stride) { + float ct_x = (x + 0.5) * stride; + float ct_y = (y + 0.5) * stride; + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + float dis = 0; + float *dis_after_sm = new float[this->reg_max + 1]; + activation_function_softmax(dfl_det + i * (this->reg_max + 1), dis_after_sm, + this->reg_max + 1); + for (int j = 0; j < this->reg_max + 1; j++) { + dis += j * dis_after_sm[j]; } - float xmin = (std::max)(ct_x - dis_pred[0], .0f); - float ymin = (std::max)(ct_y - dis_pred[1], .0f); - float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]); - float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]); - return BoxInfo { xmin, ymin, xmax, ymax, score, label }; + dis *= stride; + dis_pred[i] = dis; + delete[] dis_after_sm; + } + float xmin = (std::max)(ct_x - dis_pred[0], .0f); + float ymin = (std::max)(ct_y - dis_pred[1], .0f); + float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size[0]); + float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size[1]); + return BoxInfo{xmin, ymin, xmax, ymax, score, label}; } -void PicoDet::nms(std::vector& input_boxes, float NMS_THRESH) -{ - std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); - std::vector vArea(input_boxes.size()); - for (int i = 0; i < int(input_boxes.size()); ++i) { - vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) - * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); - } - for (int i = 0; i < int(input_boxes.size()); ++i) { - for (int j = i + 1; j < int(input_boxes.size());) { - float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); - float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); - float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); - float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); - float w = (std::max)(float(0), xx2 - xx1 + 1); - float h = (std::max)(float(0), yy2 - yy1 + 1); - float inter = w * h; - float ovr = inter / (vArea[i] + vArea[j] - inter); - if (ovr >= NMS_THRESH) { - input_boxes.erase(input_boxes.begin() + j); - vArea.erase(vArea.begin() + j); - } - else { - j++; - } - } +void PicoDet::nms(std::vector &input_boxes, float NMS_THRESH) { + std::sort(input_boxes.begin(), input_boxes.end(), + [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); + std::vector vArea(input_boxes.size()); + for (int i = 0; i < int(input_boxes.size()); ++i) { + vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * + (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); + } + for (int i = 0; i < int(input_boxes.size()); ++i) { + for (int j = i + 1; j < int(input_boxes.size());) { + float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); + float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); + float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); + float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); + float w = (std::max)(float(0), xx2 - xx1 + 1); + float h = (std::max)(float(0), yy2 - yy1 + 1); + float inter = w * h; + float ovr = inter / (vArea[i] + vArea[j] - inter); + if (ovr >= NMS_THRESH) { + input_boxes.erase(input_boxes.begin() + j); + vArea.erase(vArea.begin() + j); + } else { + j++; + } } + } } diff --git a/deploy/third_engine/demo_openvino/main.cpp b/deploy/third_engine/demo_openvino/main.cpp index b96a618fd..e24b6070f 100644 --- a/deploy/third_engine/demo_openvino/main.cpp +++ b/deploy/third_engine/demo_openvino/main.cpp @@ -14,338 +14,289 @@ // reference from https://github.com/RangiLyu/nanodet #include "picodet_openvino.h" +#include #include #include #include -#include #define image_size 416 struct object_rect { - int x; - int y; - int width; - int height; + int x; + int y; + int width; + int height; }; -int resize_uniform(cv::Mat& src, cv::Mat& dst, cv::Size dst_size, object_rect& effect_area) -{ - int w = src.cols; - int h = src.rows; - int dst_w = dst_size.width; - int dst_h = dst_size.height; - dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0)); +int resize_uniform(cv::Mat &src, cv::Mat &dst, cv::Size dst_size, + object_rect &effect_area) { + int w = src.cols; + int h = src.rows; + int dst_w = dst_size.width; + int dst_h = dst_size.height; + dst = cv::Mat(cv::Size(dst_w, dst_h), CV_8UC3, cv::Scalar(0)); - float ratio_src = w * 1.0 / h; - float ratio_dst = dst_w * 1.0 / dst_h; + float ratio_src = w * 1.0 / h; + float ratio_dst = dst_w * 1.0 / dst_h; - int tmp_w = 0; - int tmp_h = 0; - if (ratio_src > ratio_dst) { - tmp_w = dst_w; - tmp_h = floor((dst_w * 1.0 / w) * h); - } - else if (ratio_src < ratio_dst) { - tmp_h = dst_h; - tmp_w = floor((dst_h * 1.0 / h) * w); - } - else { - cv::resize(src, dst, dst_size); - effect_area.x = 0; - effect_area.y = 0; - effect_area.width = dst_w; - effect_area.height = dst_h; - return 0; - } - cv::Mat tmp; - cv::resize(src, tmp, cv::Size(tmp_w, tmp_h)); + int tmp_w = 0; + int tmp_h = 0; + if (ratio_src > ratio_dst) { + tmp_w = dst_w; + tmp_h = floor((dst_w * 1.0 / w) * h); + } else if (ratio_src < ratio_dst) { + tmp_h = dst_h; + tmp_w = floor((dst_h * 1.0 / h) * w); + } else { + cv::resize(src, dst, dst_size); + effect_area.x = 0; + effect_area.y = 0; + effect_area.width = dst_w; + effect_area.height = dst_h; + return 0; + } + cv::Mat tmp; + cv::resize(src, tmp, cv::Size(tmp_w, tmp_h)); - if (tmp_w != dst_w) { - int index_w = floor((dst_w - tmp_w) / 2.0); - for (int i = 0; i < dst_h; i++) { - memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, tmp_w * 3); - } - effect_area.x = index_w; - effect_area.y = 0; - effect_area.width = tmp_w; - effect_area.height = tmp_h; + if (tmp_w != dst_w) { + int index_w = floor((dst_w - tmp_w) / 2.0); + for (int i = 0; i < dst_h; i++) { + memcpy(dst.data + i * dst_w * 3 + index_w * 3, tmp.data + i * tmp_w * 3, + tmp_w * 3); } - else if (tmp_h != dst_h) { - int index_h = floor((dst_h - tmp_h) / 2.0); - memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3); - effect_area.x = 0; - effect_area.y = index_h; - effect_area.width = tmp_w; - effect_area.height = tmp_h; - } - else { - printf("error\n"); - } - return 0; + effect_area.x = index_w; + effect_area.y = 0; + effect_area.width = tmp_w; + effect_area.height = tmp_h; + } else if (tmp_h != dst_h) { + int index_h = floor((dst_h - tmp_h) / 2.0); + memcpy(dst.data + index_h * dst_w * 3, tmp.data, tmp_w * tmp_h * 3); + effect_area.x = 0; + effect_area.y = index_h; + effect_area.width = tmp_w; + effect_area.height = tmp_h; + } else { + printf("error\n"); + } + return 0; } -const int color_list[80][3] = -{ - {216 , 82 , 24}, - {236 ,176 , 31}, - {125 , 46 ,141}, - {118 ,171 , 47}, - { 76 ,189 ,237}, - {238 , 19 , 46}, - { 76 , 76 , 76}, - {153 ,153 ,153}, - {255 , 0 , 0}, - {255 ,127 , 0}, - {190 ,190 , 0}, - { 0 ,255 , 0}, - { 0 , 0 ,255}, - {170 , 0 ,255}, - { 84 , 84 , 0}, - { 84 ,170 , 0}, - { 84 ,255 , 0}, - {170 , 84 , 0}, - {170 ,170 , 0}, - {170 ,255 , 0}, - {255 , 84 , 0}, - {255 ,170 , 0}, - {255 ,255 , 0}, - { 0 , 84 ,127}, - { 0 ,170 ,127}, - { 0 ,255 ,127}, - { 84 , 0 ,127}, - { 84 , 84 ,127}, - { 84 ,170 ,127}, - { 84 ,255 ,127}, - {170 , 0 ,127}, - {170 , 84 ,127}, - {170 ,170 ,127}, - {170 ,255 ,127}, - {255 , 0 ,127}, - {255 , 84 ,127}, - {255 ,170 ,127}, - {255 ,255 ,127}, - { 0 , 84 ,255}, - { 0 ,170 ,255}, - { 0 ,255 ,255}, - { 84 , 0 ,255}, - { 84 , 84 ,255}, - { 84 ,170 ,255}, - { 84 ,255 ,255}, - {170 , 0 ,255}, - {170 , 84 ,255}, - {170 ,170 ,255}, - {170 ,255 ,255}, - {255 , 0 ,255}, - {255 , 84 ,255}, - {255 ,170 ,255}, - { 42 , 0 , 0}, - { 84 , 0 , 0}, - {127 , 0 , 0}, - {170 , 0 , 0}, - {212 , 0 , 0}, - {255 , 0 , 0}, - { 0 , 42 , 0}, - { 0 , 84 , 0}, - { 0 ,127 , 0}, - { 0 ,170 , 0}, - { 0 ,212 , 0}, - { 0 ,255 , 0}, - { 0 , 0 , 42}, - { 0 , 0 , 84}, - { 0 , 0 ,127}, - { 0 , 0 ,170}, - { 0 , 0 ,212}, - { 0 , 0 ,255}, - { 0 , 0 , 0}, - { 36 , 36 , 36}, - { 72 , 72 , 72}, - {109 ,109 ,109}, - {145 ,145 ,145}, - {182 ,182 ,182}, - {218 ,218 ,218}, - { 0 ,113 ,188}, - { 80 ,182 ,188}, - {127 ,127 , 0}, +const int color_list[80][3] = { + {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, + {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, {153, 153, 153}, + {255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0}, + {0, 0, 255}, {170, 0, 255}, {84, 84, 0}, {84, 170, 0}, + {84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0}, + {255, 84, 0}, {255, 170, 0}, {255, 255, 0}, {0, 84, 127}, + {0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127}, + {84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127}, + {170, 170, 127}, {170, 255, 127}, {255, 0, 127}, {255, 84, 127}, + {255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255}, + {0, 255, 255}, {84, 0, 255}, {84, 84, 255}, {84, 170, 255}, + {84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255}, + {170, 255, 255}, {255, 0, 255}, {255, 84, 255}, {255, 170, 255}, + {42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0}, + {212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0}, + {0, 127, 0}, {0, 170, 0}, {0, 212, 0}, {0, 255, 0}, + {0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170}, + {0, 0, 212}, {0, 0, 255}, {0, 0, 0}, {36, 36, 36}, + {72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182}, + {218, 218, 218}, {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, }; -void draw_bboxes(const cv::Mat& bgr, const std::vector& bboxes, object_rect effect_roi) -{ - static const char* class_names[] = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", - "train", "truck", "boat", "traffic light", "fire hydrant", - "stop sign", "parking meter", "bench", "bird", "cat", "dog", - "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", - "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", - "skis", "snowboard", "sports ball", "kite", "baseball bat", - "baseball glove", "skateboard", "surfboard", "tennis racket", - "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", - "banana", "apple", "sandwich", "orange", "broccoli", "carrot", - "hot dog", "pizza", "donut", "cake", "chair", "couch", - "potted plant", "bed", "dining table", "toilet", "tv", "laptop", - "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", - "toaster", "sink", "refrigerator", "book", "clock", "vase", - "scissors", "teddy bear", "hair drier", "toothbrush" - }; +void draw_bboxes(const cv::Mat &bgr, const std::vector &bboxes, + object_rect effect_roi) { + static const char *class_names[] = { + "person", "bicycle", "car", + "motorcycle", "airplane", "bus", + "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", + "parking meter", "bench", "bird", + "cat", "dog", "horse", + "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", + "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", + "skis", "snowboard", "sports ball", + "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", + "bottle", "wine glass", "cup", + "fork", "knife", "spoon", + "bowl", "banana", "apple", + "sandwich", "orange", "broccoli", + "carrot", "hot dog", "pizza", + "donut", "cake", "chair", + "couch", "potted plant", "bed", + "dining table", "toilet", "tv", + "laptop", "mouse", "remote", + "keyboard", "cell phone", "microwave", + "oven", "toaster", "sink", + "refrigerator", "book", "clock", + "vase", "scissors", "teddy bear", + "hair drier", "toothbrush"}; - cv::Mat image = bgr.clone(); - int src_w = image.cols; - int src_h = image.rows; - int dst_w = effect_roi.width; - int dst_h = effect_roi.height; - float width_ratio = (float)src_w / (float)dst_w; - float height_ratio = (float)src_h / (float)dst_h; + cv::Mat image = bgr.clone(); + int src_w = image.cols; + int src_h = image.rows; + int dst_w = effect_roi.width; + int dst_h = effect_roi.height; + float width_ratio = (float)src_w / (float)dst_w; + float height_ratio = (float)src_h / (float)dst_h; + for (size_t i = 0; i < bboxes.size(); i++) { + const BoxInfo &bbox = bboxes[i]; + cv::Scalar color = + cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], + color_list[bbox.label][2]); + cv::rectangle(image, + cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, + (bbox.y1 - effect_roi.y) * height_ratio), + cv::Point((bbox.x2 - effect_roi.x) * width_ratio, + (bbox.y2 - effect_roi.y) * height_ratio)), + color); - for (size_t i = 0; i < bboxes.size(); i++) - { - const BoxInfo& bbox = bboxes[i]; - cv::Scalar color = cv::Scalar(color_list[bbox.label][0], color_list[bbox.label][1], color_list[bbox.label][2]); - cv::rectangle(image, cv::Rect(cv::Point((bbox.x1 - effect_roi.x) * width_ratio, (bbox.y1 - effect_roi.y) * height_ratio), - cv::Point((bbox.x2 - effect_roi.x) * width_ratio, (bbox.y2 - effect_roi.y) * height_ratio)), color); + char text[256]; + sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); + int baseLine = 0; + cv::Size label_size = + cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + int x = (bbox.x1 - effect_roi.x) * width_ratio; + int y = + (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine; + if (y < 0) + y = 0; + if (x + label_size.width > image.cols) + x = image.cols - label_size.width; - char text[256]; - sprintf(text, "%s %.1f%%", class_names[bbox.label], bbox.score * 100); - int baseLine = 0; - cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); - int x = (bbox.x1 - effect_roi.x) * width_ratio; - int y = (bbox.y1 - effect_roi.y) * height_ratio - label_size.height - baseLine; - if (y < 0) - y = 0; - if (x + label_size.width > image.cols) - x = image.cols - label_size.width; + cv::rectangle(image, cv::Rect(cv::Point(x, y), + cv::Size(label_size.width, + label_size.height + baseLine)), + color, -1); + cv::putText(image, text, cv::Point(x, y + label_size.height), + cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); + } - cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)), - color, -1); - - cv::putText(image, text, cv::Point(x, y + label_size.height), - cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); - } - - cv::imwrite("../predict.jpg",image); + cv::imwrite("../predict.jpg", image); } +int image_demo(PicoDet &detector, const char *imagepath) { + std::vector filenames; + cv::glob(imagepath, filenames, false); -int image_demo(PicoDet& detector, const char* imagepath) -{ - std::vector filenames; - cv::glob(imagepath, filenames, false); - - for (auto img_name : filenames) - { - cv::Mat image = cv::imread(img_name); - if (image.empty()) - { - return -1; - } - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(image_size, image_size), effect_roi); - auto results = detector.detect(resized_img, 0.4, 0.5); - draw_bboxes(image, results, effect_roi); + for (auto img_name : filenames) { + cv::Mat image = cv::imread(img_name); + if (image.empty()) { + return -1; } - return 0; + object_rect effect_roi; + cv::Mat resized_img; + resize_uniform(image, resized_img, cv::Size(image_size, image_size), + effect_roi); + auto results = detector.detect(resized_img, 0.4, 0.5); + draw_bboxes(image, results, effect_roi); + } + return 0; } -int webcam_demo(PicoDet& detector, int cam_id) -{ - cv::Mat image; - cv::VideoCapture cap(cam_id); - - while (true) - { - cap >> image; - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(image_size, image_size), effect_roi); - auto results = detector.detect(resized_img, 0.4, 0.5); - draw_bboxes(image, results, effect_roi); - cv::waitKey(1); - } - return 0; +int webcam_demo(PicoDet &detector, int cam_id) { + cv::Mat image; + cv::VideoCapture cap(cam_id); + while (true) { + cap >> image; + object_rect effect_roi; + cv::Mat resized_img; + resize_uniform(image, resized_img, cv::Size(image_size, image_size), + effect_roi); + auto results = detector.detect(resized_img, 0.4, 0.5); + draw_bboxes(image, results, effect_roi); + cv::waitKey(1); + } + return 0; } -int video_demo(PicoDet& detector, const char* path) -{ - cv::Mat image; - cv::VideoCapture cap(path); +int video_demo(PicoDet &detector, const char *path) { + cv::Mat image; + cv::VideoCapture cap(path); - while (true) - { - cap >> image; - object_rect effect_roi; - cv::Mat resized_img; - resize_uniform(image, resized_img, cv::Size(image_size, image_size), effect_roi); - auto results = detector.detect(resized_img, 0.4, 0.5); - draw_bboxes(image, results, effect_roi); - cv::waitKey(1); - } - return 0; + while (true) { + cap >> image; + object_rect effect_roi; + cv::Mat resized_img; + resize_uniform(image, resized_img, cv::Size(image_size, image_size), + effect_roi); + auto results = detector.detect(resized_img, 0.4, 0.5); + draw_bboxes(image, results, effect_roi); + cv::waitKey(1); + } + return 0; } -int benchmark(PicoDet& detector) -{ - int loop_num = 100; - int warm_up = 8; +int benchmark(PicoDet &detector) { + int loop_num = 100; + int warm_up = 8; - double time_min = DBL_MAX; - double time_max = -DBL_MAX; - double time_avg = 0; - cv::Mat image(image_size, image_size, CV_8UC3, cv::Scalar(1, 1, 1)); + double time_min = DBL_MAX; + double time_max = -DBL_MAX; + double time_avg = 0; + cv::Mat image(image_size, image_size, CV_8UC3, cv::Scalar(1, 1, 1)); - for (int i = 0; i < warm_up + loop_num; i++) - { - auto start = std::chrono::steady_clock::now(); - std::vector results; - results = detector.detect(image, 0.4, 0.5); - auto end = std::chrono::steady_clock::now(); - double time = std::chrono::duration(end - start).count(); - if (i >= warm_up) - { - time_min = (std::min)(time_min, time); - time_max = (std::max)(time_max, time); - time_avg += time; - } + for (int i = 0; i < warm_up + loop_num; i++) { + auto start = std::chrono::steady_clock::now(); + std::vector results; + results = detector.detect(image, 0.4, 0.5); + auto end = std::chrono::steady_clock::now(); + double time = + std::chrono::duration(end - start).count(); + if (i >= warm_up) { + time_min = (std::min)(time_min, time); + time_max = (std::max)(time_max, time); + time_avg += time; } - time_avg /= loop_num; - fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", time_min, time_max, time_avg); - return 0; + } + time_avg /= loop_num; + fprintf(stderr, "%20s min = %7.2f max = %7.2f avg = %7.2f\n", "picodet", + time_min, time_max, time_avg); + return 0; } +int main(int argc, char **argv) { + if (argc != 3) { + fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is " + "cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n " + "For video, mode=2; \n For benchmark, mode=3 path=0.\n", + argv[0]); + return -1; + } + std::cout << "start init model" << std::endl; + auto detector = PicoDet("../weight/picodet_m_416.xml"); + std::cout << "success" << std::endl; -int main(int argc, char** argv) -{ - if (argc != 3) - { - fprintf(stderr, "usage: %s [mode] [path]. \n For webcam mode=0, path is cam id; \n For image demo, mode=1, path=xxx/xxx/*.jpg; \n For video, mode=2; \n For benchmark, mode=3 path=0.\n", argv[0]); - return -1; - } - std::cout<<"start init model"< -int activation_function_softmax(const _Tp* src, _Tp* dst, int length) -{ - const _Tp alpha = *std::max_element(src, src + length); - _Tp denominator{ 0 }; - - for (int i = 0; i < length; ++i) - { - dst[i] = fast_exp(src[i] - alpha); - denominator += dst[i]; - } - - for (int i = 0; i < length; ++i) - { - dst[i] /= denominator; - } +inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); } - return 0; -} +template +int activation_function_softmax(const _Tp *src, _Tp *dst, int length) { + const _Tp alpha = *std::max_element(src, src + length); + _Tp denominator{0}; -PicoDet::PicoDet(const char* model_path) -{ - InferenceEngine::Core ie; - InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path); - // prepare input settings - InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo()); - input_name_ = inputs_map.begin()->first; - InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second; - //prepare output settings - InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo()); - for (auto &output_info : outputs_map) - { - output_info.second->setPrecision(InferenceEngine::Precision::FP32); - } + for (int i = 0; i < length; ++i) { + dst[i] = fast_exp(src[i] - alpha); + denominator += dst[i]; + } - //get network - network_ = ie.LoadNetwork(model, "CPU"); - infer_request_ = network_.CreateInferRequest(); + for (int i = 0; i < length; ++i) { + dst[i] /= denominator; + } + return 0; } -PicoDet::~PicoDet() -{ +PicoDet::PicoDet(const char *model_path) { + InferenceEngine::Core ie; + InferenceEngine::CNNNetwork model = ie.ReadNetwork(model_path); + // prepare input settings + InferenceEngine::InputsDataMap inputs_map(model.getInputsInfo()); + input_name_ = inputs_map.begin()->first; + InferenceEngine::InputInfo::Ptr input_info = inputs_map.begin()->second; + // prepare output settings + InferenceEngine::OutputsDataMap outputs_map(model.getOutputsInfo()); + for (auto &output_info : outputs_map) { + output_info.second->setPrecision(InferenceEngine::Precision::FP32); + } + + // get network + network_ = ie.LoadNetwork(model, "CPU"); + infer_request_ = network_.CreateInferRequest(); } -void PicoDet::preprocess(cv::Mat& image, InferenceEngine::Blob::Ptr& blob) -{ - int img_w = image.cols; - int img_h = image.rows; - int channels = 3; - - InferenceEngine::MemoryBlob::Ptr mblob = InferenceEngine::as(blob); - if (!mblob) - { - THROW_IE_EXCEPTION << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, " - << "but by fact we were not able to cast inputBlob to MemoryBlob"; - } - auto mblobHolder = mblob->wmap(); - float *blob_data = mblobHolder.as(); - - for (size_t c = 0; c < channels; c++) - { - for (size_t h = 0; h < img_h; h++) - { - for (size_t w = 0; w < img_w; w++) - { - blob_data[c * img_w * img_h + h * img_w + w] = - (float)image.at(h, w)[c]; - } - } +PicoDet::~PicoDet() {} + +void PicoDet::preprocess(cv::Mat &image, InferenceEngine::Blob::Ptr &blob) { + int img_w = image.cols; + int img_h = image.rows; + int channels = 3; + + InferenceEngine::MemoryBlob::Ptr mblob = + InferenceEngine::as(blob); + if (!mblob) { + THROW_IE_EXCEPTION + << "We expect blob to be inherited from MemoryBlob in matU8ToBlob, " + << "but by fact we were not able to cast inputBlob to MemoryBlob"; + } + auto mblobHolder = mblob->wmap(); + float *blob_data = mblobHolder.as(); + + for (size_t c = 0; c < channels; c++) { + for (size_t h = 0; h < img_h; h++) { + for (size_t w = 0; w < img_w; w++) { + blob_data[c * img_w * img_h + h * img_w + w] = + (float)image.at(h, w)[c]; + } } + } } -std::vector PicoDet::detect(cv::Mat image, float score_threshold, float nms_threshold) -{ - InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_); - preprocess(image, input_blob); - - // do inference - infer_request_.Infer(); - - // get output - std::vector> results; - results.resize(this->num_class_); - - for (const auto& head_info : this->heads_info_) - { - const InferenceEngine::Blob::Ptr dis_pred_blob = infer_request_.GetBlob(head_info.dis_layer); - const InferenceEngine::Blob::Ptr cls_pred_blob = infer_request_.GetBlob(head_info.cls_layer); - - auto mdis_pred = InferenceEngine::as(dis_pred_blob); - auto mdis_pred_holder = mdis_pred->rmap(); - const float *dis_pred = mdis_pred_holder.as(); - - auto mcls_pred = InferenceEngine::as(cls_pred_blob); - auto mcls_pred_holder = mcls_pred->rmap(); - const float *cls_pred = mcls_pred_holder.as(); - this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, results); - } - - std::vector dets; - for (int i = 0; i < (int)results.size(); i++) - { - this->nms(results[i], nms_threshold); - - for (auto& box : results[i]) - { - dets.push_back(box); - } +std::vector PicoDet::detect(cv::Mat image, float score_threshold, + float nms_threshold) { + InferenceEngine::Blob::Ptr input_blob = infer_request_.GetBlob(input_name_); + preprocess(image, input_blob); + + // do inference + infer_request_.Infer(); + + // get output + std::vector> results; + results.resize(this->num_class_); + + for (const auto &head_info : this->heads_info_) { + const InferenceEngine::Blob::Ptr dis_pred_blob = + infer_request_.GetBlob(head_info.dis_layer); + const InferenceEngine::Blob::Ptr cls_pred_blob = + infer_request_.GetBlob(head_info.cls_layer); + + auto mdis_pred = + InferenceEngine::as(dis_pred_blob); + auto mdis_pred_holder = mdis_pred->rmap(); + const float *dis_pred = mdis_pred_holder.as(); + + auto mcls_pred = + InferenceEngine::as(cls_pred_blob); + auto mcls_pred_holder = mcls_pred->rmap(); + const float *cls_pred = mcls_pred_holder.as(); + this->decode_infer(cls_pred, dis_pred, head_info.stride, score_threshold, + results); + } + + std::vector dets; + for (int i = 0; i < (int)results.size(); i++) { + this->nms(results[i], nms_threshold); + + for (auto &box : results[i]) { + dets.push_back(box); } - return dets; + } + return dets; } -void PicoDet::decode_infer(const float*& cls_pred, const float*& dis_pred, int stride, float threshold, std::vector>& results) -{ - int feature_h = input_size_ / stride; - int feature_w = input_size_ / stride; - for (int idx = 0; idx < feature_h * feature_w; idx++) - { - int row = idx / feature_w; - int col = idx % feature_w; - float score = 0; - int cur_label = 0; - - for (int label = 0; label < num_class_; label++) - { - if (cls_pred[idx * num_class_ +label] > score) - { - score = cls_pred[idx * num_class_ + label]; - cur_label = label; - } - } - if (score > threshold) - { - const float* bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4; - results[cur_label].push_back(this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); - } - +void PicoDet::decode_infer(const float *&cls_pred, const float *&dis_pred, + int stride, float threshold, + std::vector> &results) { + int feature_h = ceil((float)input_size_ / stride); + int feature_w = ceil((float)input_size_ / stride); + for (int idx = 0; idx < feature_h * feature_w; idx++) { + int row = idx / feature_w; + int col = idx % feature_w; + float score = 0; + int cur_label = 0; + + for (int label = 0; label < num_class_; label++) { + if (cls_pred[idx * num_class_ + label] > score) { + score = cls_pred[idx * num_class_ + label]; + cur_label = label; + } } + if (score > threshold) { + const float *bbox_pred = dis_pred + idx * (reg_max_ + 1) * 4; + results[cur_label].push_back( + this->disPred2Bbox(bbox_pred, cur_label, score, col, row, stride)); + } + } } -BoxInfo PicoDet::disPred2Bbox(const float*& dfl_det, int label, float score, int x, int y, int stride) -{ - float ct_x = (x + 0.5) * stride; - float ct_y = (y + 0.5) * stride; - std::vector dis_pred; - dis_pred.resize(4); - for (int i = 0; i < 4; i++) - { - float dis = 0; - float* dis_after_sm = new float[reg_max_ + 1]; - activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm, reg_max_ + 1); - for (int j = 0; j < reg_max_ + 1; j++) - { - dis += j * dis_after_sm[j]; - } - dis *= stride; - dis_pred[i] = dis; - delete[] dis_after_sm; +BoxInfo PicoDet::disPred2Bbox(const float *&dfl_det, int label, float score, + int x, int y, int stride) { + float ct_x = (x + 0.5) * stride; + float ct_y = (y + 0.5) * stride; + std::vector dis_pred; + dis_pred.resize(4); + for (int i = 0; i < 4; i++) { + float dis = 0; + float *dis_after_sm = new float[reg_max_ + 1]; + activation_function_softmax(dfl_det + i * (reg_max_ + 1), dis_after_sm, + reg_max_ + 1); + for (int j = 0; j < reg_max_ + 1; j++) { + dis += j * dis_after_sm[j]; } - float xmin = (std::max)(ct_x - dis_pred[0], .0f); - float ymin = (std::max)(ct_y - dis_pred[1], .0f); - float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_); - float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_); - return BoxInfo { xmin, ymin, xmax, ymax, score, label }; + dis *= stride; + dis_pred[i] = dis; + delete[] dis_after_sm; + } + float xmin = (std::max)(ct_x - dis_pred[0], .0f); + float ymin = (std::max)(ct_y - dis_pred[1], .0f); + float xmax = (std::min)(ct_x + dis_pred[2], (float)this->input_size_); + float ymax = (std::min)(ct_y + dis_pred[3], (float)this->input_size_); + return BoxInfo{xmin, ymin, xmax, ymax, score, label}; } -void PicoDet::nms(std::vector& input_boxes, float NMS_THRESH) -{ - std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); - std::vector vArea(input_boxes.size()); - for (int i = 0; i < int(input_boxes.size()); ++i) - { - vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) - * (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); - } - for (int i = 0; i < int(input_boxes.size()); ++i) - { - for (int j = i + 1; j < int(input_boxes.size());) - { - float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); - float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); - float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); - float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); - float w = (std::max)(float(0), xx2 - xx1 + 1); - float h = (std::max)(float(0), yy2 - yy1 + 1); - float inter = w * h; - float ovr = inter / (vArea[i] + vArea[j] - inter); - if (ovr >= NMS_THRESH) - { - input_boxes.erase(input_boxes.begin() + j); - vArea.erase(vArea.begin() + j); - } - else - { - j++; - } - } +void PicoDet::nms(std::vector &input_boxes, float NMS_THRESH) { + std::sort(input_boxes.begin(), input_boxes.end(), + [](BoxInfo a, BoxInfo b) { return a.score > b.score; }); + std::vector vArea(input_boxes.size()); + for (int i = 0; i < int(input_boxes.size()); ++i) { + vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) * + (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1); + } + for (int i = 0; i < int(input_boxes.size()); ++i) { + for (int j = i + 1; j < int(input_boxes.size());) { + float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1); + float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1); + float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2); + float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2); + float w = (std::max)(float(0), xx2 - xx1 + 1); + float h = (std::max)(float(0), yy2 - yy1 + 1); + float inter = w * h; + float ovr = inter / (vArea[i] + vArea[j] - inter); + if (ovr >= NMS_THRESH) { + input_boxes.erase(input_boxes.begin() + j); + vArea.erase(vArea.begin() + j); + } else { + j++; + } } + } } -- GitLab