未验证 提交 35c9da7a 编写于 作者: J JYChen 提交者: GitHub

fix cpp inference trt error when bs>1 (#5518)

上级 fb504439
...@@ -15,16 +15,15 @@ ...@@ -15,16 +15,15 @@
// for setprecision // for setprecision
#include <chrono> #include <chrono>
#include <iomanip> #include <iomanip>
#include "include/object_detector.h"
using namespace paddle_infer; #include "include/object_detector.h"
namespace PaddleDetection { namespace PaddleDetection {
// Load Model and create model predictor // Load Model and create model predictor
void ObjectDetector::LoadModel(const std::string& model_dir, void ObjectDetector::LoadModel(const std::string &model_dir,
const int batch_size, const int batch_size,
const std::string& run_mode) { const std::string &run_mode) {
paddle_infer::Config config; paddle_infer::Config config;
std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel";
std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams";
...@@ -42,27 +41,22 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -42,27 +41,22 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
} else if (run_mode == "trt_int8") { } else if (run_mode == "trt_int8") {
precision = paddle_infer::Config::Precision::kInt8; precision = paddle_infer::Config::Precision::kInt8;
} else { } else {
printf( printf("run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or "
"run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or " "'trt_int8'");
"'trt_int8'");
} }
// set tensorrt // set tensorrt
config.EnableTensorRtEngine(1 << 30, config.EnableTensorRtEngine(1 << 30, batch_size, this->min_subgraph_size_,
batch_size, precision, false, this->trt_calib_mode_);
this->min_subgraph_size_,
precision,
false,
this->trt_calib_mode_);
// set use dynamic shape // set use dynamic shape
if (this->use_dynamic_shape_) { if (this->use_dynamic_shape_) {
// set DynamicShsape for image tensor // set DynamicShape for image tensor
const std::vector<int> min_input_shape = { const std::vector<int> min_input_shape = {
1, 3, this->trt_min_shape_, this->trt_min_shape_}; batch_size, 3, this->trt_min_shape_, this->trt_min_shape_};
const std::vector<int> max_input_shape = { const std::vector<int> max_input_shape = {
1, 3, this->trt_max_shape_, this->trt_max_shape_}; batch_size, 3, this->trt_max_shape_, this->trt_max_shape_};
const std::vector<int> opt_input_shape = { const std::vector<int> opt_input_shape = {
1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; batch_size, 3, this->trt_opt_shape_, this->trt_opt_shape_};
const std::map<std::string, std::vector<int>> map_min_input_shape = { const std::map<std::string, std::vector<int>> map_min_input_shape = {
{"image", min_input_shape}}; {"image", min_input_shape}};
const std::map<std::string, std::vector<int>> map_max_input_shape = { const std::map<std::string, std::vector<int>> map_max_input_shape = {
...@@ -70,8 +64,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -70,8 +64,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
const std::map<std::string, std::vector<int>> map_opt_input_shape = { const std::map<std::string, std::vector<int>> map_opt_input_shape = {
{"image", opt_input_shape}}; {"image", opt_input_shape}};
config.SetTRTDynamicShapeInfo( config.SetTRTDynamicShapeInfo(map_min_input_shape, map_max_input_shape,
map_min_input_shape, map_max_input_shape, map_opt_input_shape); map_opt_input_shape);
std::cout << "TensorRT dynamic shape enabled" << std::endl; std::cout << "TensorRT dynamic shape enabled" << std::endl;
} }
} }
...@@ -96,12 +90,11 @@ void ObjectDetector::LoadModel(const std::string& model_dir, ...@@ -96,12 +90,11 @@ void ObjectDetector::LoadModel(const std::string& model_dir,
} }
// Visualiztion MaskDetector results // Visualiztion MaskDetector results
cv::Mat VisualizeResult( cv::Mat
const cv::Mat& img, VisualizeResult(const cv::Mat &img,
const std::vector<PaddleDetection::ObjectResult>& results, const std::vector<PaddleDetection::ObjectResult> &results,
const std::vector<std::string>& lables, const std::vector<std::string> &lables,
const std::vector<int>& colormap, const std::vector<int> &colormap, const bool is_rbox = false) {
const bool is_rbox = false) {
cv::Mat vis_img = img.clone(); cv::Mat vis_img = img.clone();
for (int i = 0; i < results.size(); ++i) { for (int i = 0; i < results.size(); ++i) {
// Configure color and text size // Configure color and text size
...@@ -142,24 +135,18 @@ cv::Mat VisualizeResult( ...@@ -142,24 +135,18 @@ cv::Mat VisualizeResult(
origin.y = results[i].rect[1]; origin.y = results[i].rect[1];
// Configure text background // Configure text background
cv::Rect text_back = cv::Rect(results[i].rect[0], cv::Rect text_back =
results[i].rect[1] - text_size.height, cv::Rect(results[i].rect[0], results[i].rect[1] - text_size.height,
text_size.width, text_size.width, text_size.height);
text_size.height);
// Draw text, and background // Draw text, and background
cv::rectangle(vis_img, text_back, roi_color, -1); cv::rectangle(vis_img, text_back, roi_color, -1);
cv::putText(vis_img, cv::putText(vis_img, text, origin, font_face, font_scale,
text, cv::Scalar(255, 255, 255), thickness);
origin,
font_face,
font_scale,
cv::Scalar(255, 255, 255),
thickness);
} }
return vis_img; return vis_img;
} }
void ObjectDetector::Preprocess(const cv::Mat& ori_im) { void ObjectDetector::Preprocess(const cv::Mat &ori_im) {
// Clone the image : keep the original mat for postprocess // Clone the image : keep the original mat for postprocess
cv::Mat im = ori_im.clone(); cv::Mat im = ori_im.clone();
cv::cvtColor(im, im, cv::COLOR_BGR2RGB); cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
...@@ -168,9 +155,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) { ...@@ -168,9 +155,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
void ObjectDetector::Postprocess( void ObjectDetector::Postprocess(
const std::vector<cv::Mat> mats, const std::vector<cv::Mat> mats,
std::vector<PaddleDetection::ObjectResult>* result, std::vector<PaddleDetection::ObjectResult> *result,
std::vector<int> bbox_num, std::vector<int> bbox_num, std::vector<float> output_data_,
std::vector<float> output_data_,
bool is_rbox = false) { bool is_rbox = false) {
result->clear(); result->clear();
int start_idx = 0; int start_idx = 0;
...@@ -226,12 +212,11 @@ void ObjectDetector::Postprocess( ...@@ -226,12 +212,11 @@ void ObjectDetector::Postprocess(
} }
void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
const double threshold, const double threshold, const int warmup,
const int warmup,
const int repeats, const int repeats,
std::vector<PaddleDetection::ObjectResult>* result, std::vector<PaddleDetection::ObjectResult> *result,
std::vector<int>* bbox_num, std::vector<int> *bbox_num,
std::vector<double>* times) { std::vector<double> *times) {
auto preprocess_start = std::chrono::steady_clock::now(); auto preprocess_start = std::chrono::steady_clock::now();
int batch_size = imgs.size(); int batch_size = imgs.size();
...@@ -239,7 +224,7 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, ...@@ -239,7 +224,7 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
std::vector<float> in_data_all; std::vector<float> in_data_all;
std::vector<float> im_shape_all(batch_size * 2); std::vector<float> im_shape_all(batch_size * 2);
std::vector<float> scale_factor_all(batch_size * 2); std::vector<float> scale_factor_all(batch_size * 2);
std::vector<const float*> output_data_list_; std::vector<const float *> output_data_list_;
std::vector<int> out_bbox_num_data_; std::vector<int> out_bbox_num_data_;
// in_net img for each batch // in_net img for each batch
...@@ -255,9 +240,8 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, ...@@ -255,9 +240,8 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0]; scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1]; scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
// TODO: reduce cost time in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(),
in_data_all.insert( inputs_.im_data_.end());
in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
// collect in_net img // collect in_net img
in_net_img_all[bs_idx] = inputs_.in_net_im_; in_net_img_all[bs_idx] = inputs_.in_net_im_;
...@@ -276,10 +260,10 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, ...@@ -276,10 +260,10 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
pad_img.convertTo(pad_img, CV_32FC3); pad_img.convertTo(pad_img, CV_32FC3);
std::vector<float> pad_data; std::vector<float> pad_data;
pad_data.resize(rc * rh * rw); pad_data.resize(rc * rh * rw);
float* base = pad_data.data(); float *base = pad_data.data();
for (int i = 0; i < rc; ++i) { for (int i = 0; i < rc; ++i) {
cv::extractChannel( cv::extractChannel(pad_img,
pad_img, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i); cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i);
} }
in_data_all.insert(in_data_all.end(), pad_data.begin(), pad_data.end()); in_data_all.insert(in_data_all.end(), pad_data.begin(), pad_data.end());
} }
...@@ -290,7 +274,7 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, ...@@ -290,7 +274,7 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
auto preprocess_end = std::chrono::steady_clock::now(); auto preprocess_end = std::chrono::steady_clock::now();
// Prepare input tensor // Prepare input tensor
auto input_names = predictor_->GetInputNames(); auto input_names = predictor_->GetInputNames();
for (const auto& tensor_name : input_names) { for (const auto &tensor_name : input_names) {
auto in_tensor = predictor_->GetInputHandle(tensor_name); auto in_tensor = predictor_->GetInputHandle(tensor_name);
if (tensor_name == "image") { if (tensor_name == "image") {
int rh = inputs_.in_net_shape_[0]; int rh = inputs_.in_net_shape_[0];
...@@ -320,8 +304,8 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, ...@@ -320,8 +304,8 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
for (int j = 0; j < output_names.size(); j++) { for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetOutputHandle(output_names[j]); auto output_tensor = predictor_->GetOutputHandle(output_names[j]);
std::vector<int> output_shape = output_tensor->shape(); std::vector<int> output_shape = output_tensor->shape();
int out_num = std::accumulate( int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>()); std::multiplies<int>());
if (output_tensor->type() == paddle_infer::DataType::INT32) { if (output_tensor->type() == paddle_infer::DataType::INT32) {
out_bbox_num_data_.resize(out_num); out_bbox_num_data_.resize(out_num);
output_tensor->CopyToCpu(out_bbox_num_data_.data()); output_tensor->CopyToCpu(out_bbox_num_data_.data());
...@@ -344,8 +328,8 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, ...@@ -344,8 +328,8 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
for (int j = 0; j < output_names.size(); j++) { for (int j = 0; j < output_names.size(); j++) {
auto output_tensor = predictor_->GetOutputHandle(output_names[j]); auto output_tensor = predictor_->GetOutputHandle(output_names[j]);
std::vector<int> output_shape = output_tensor->shape(); std::vector<int> output_shape = output_tensor->shape();
int out_num = std::accumulate( int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>()); std::multiplies<int>());
output_shape_list.push_back(output_shape); output_shape_list.push_back(output_shape);
if (output_tensor->type() == paddle_infer::DataType::INT32) { if (output_tensor->type() == paddle_infer::DataType::INT32) {
out_bbox_num_data_.resize(out_num); out_bbox_num_data_.resize(out_num);
...@@ -371,22 +355,15 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs, ...@@ -371,22 +355,15 @@ void ObjectDetector::Predict(const std::vector<cv::Mat> imgs,
if (i == config_.fpn_stride_.size()) { if (i == config_.fpn_stride_.size()) {
reg_max = output_shape_list[i][2] / 4 - 1; reg_max = output_shape_list[i][2] / 4 - 1;
} }
float* buffer = new float[out_tensor_list[i].size()]; float *buffer = new float[out_tensor_list[i].size()];
memcpy(buffer, memcpy(buffer, &out_tensor_list[i][0],
&out_tensor_list[i][0],
out_tensor_list[i].size() * sizeof(float)); out_tensor_list[i].size() * sizeof(float));
output_data_list_.push_back(buffer); output_data_list_.push_back(buffer);
} }
PaddleDetection::PicoDetPostProcess( PaddleDetection::PicoDetPostProcess(
result, result, output_data_list_, config_.fpn_stride_, inputs_.im_shape_,
output_data_list_, inputs_.scale_factor_, config_.nms_info_["score_threshold"].as<float>(),
config_.fpn_stride_, config_.nms_info_["nms_threshold"].as<float>(), num_class, reg_max);
inputs_.im_shape_,
inputs_.scale_factor_,
config_.nms_info_["score_threshold"].as<float>(),
config_.nms_info_["nms_threshold"].as<float>(),
num_class,
reg_max);
bbox_num->push_back(result->size()); bbox_num->push_back(result->size());
} else { } else {
is_rbox = output_shape_list[0][output_shape_list[0].size() - 1] % 10 == 0; is_rbox = output_shape_list[0][output_shape_list[0].size() - 1] % 10 == 0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册