From 35c9da7a61d18a0bd9892b4ac96fc86a0371ff5f Mon Sep 17 00:00:00 2001 From: JYChen Date: Wed, 30 Mar 2022 17:37:29 +0800 Subject: [PATCH] fix cpp inference trt error when bs>1 (#5518) --- deploy/cpp/src/object_detector.cc | 115 ++++++++++++------------------ 1 file changed, 46 insertions(+), 69 deletions(-) diff --git a/deploy/cpp/src/object_detector.cc b/deploy/cpp/src/object_detector.cc index a99fcd515..e455c90aa 100644 --- a/deploy/cpp/src/object_detector.cc +++ b/deploy/cpp/src/object_detector.cc @@ -15,16 +15,15 @@ // for setprecision #include #include -#include "include/object_detector.h" -using namespace paddle_infer; +#include "include/object_detector.h" namespace PaddleDetection { // Load Model and create model predictor -void ObjectDetector::LoadModel(const std::string& model_dir, +void ObjectDetector::LoadModel(const std::string &model_dir, const int batch_size, - const std::string& run_mode) { + const std::string &run_mode) { paddle_infer::Config config; std::string prog_file = model_dir + OS_PATH_SEP + "model.pdmodel"; std::string params_file = model_dir + OS_PATH_SEP + "model.pdiparams"; @@ -42,27 +41,22 @@ void ObjectDetector::LoadModel(const std::string& model_dir, } else if (run_mode == "trt_int8") { precision = paddle_infer::Config::Precision::kInt8; } else { - printf( - "run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or " - "'trt_int8'"); + printf("run_mode should be 'paddle', 'trt_fp32', 'trt_fp16' or " + "'trt_int8'"); } // set tensorrt - config.EnableTensorRtEngine(1 << 30, - batch_size, - this->min_subgraph_size_, - precision, - false, - this->trt_calib_mode_); + config.EnableTensorRtEngine(1 << 30, batch_size, this->min_subgraph_size_, + precision, false, this->trt_calib_mode_); // set use dynamic shape if (this->use_dynamic_shape_) { - // set DynamicShsape for image tensor + // set DynamicShape for image tensor const std::vector min_input_shape = { - 1, 3, this->trt_min_shape_, this->trt_min_shape_}; + batch_size, 3, this->trt_min_shape_, this->trt_min_shape_}; const std::vector max_input_shape = { - 1, 3, this->trt_max_shape_, this->trt_max_shape_}; + batch_size, 3, this->trt_max_shape_, this->trt_max_shape_}; const std::vector opt_input_shape = { - 1, 3, this->trt_opt_shape_, this->trt_opt_shape_}; + batch_size, 3, this->trt_opt_shape_, this->trt_opt_shape_}; const std::map> map_min_input_shape = { {"image", min_input_shape}}; const std::map> map_max_input_shape = { @@ -70,8 +64,8 @@ void ObjectDetector::LoadModel(const std::string& model_dir, const std::map> map_opt_input_shape = { {"image", opt_input_shape}}; - config.SetTRTDynamicShapeInfo( - map_min_input_shape, map_max_input_shape, map_opt_input_shape); + config.SetTRTDynamicShapeInfo(map_min_input_shape, map_max_input_shape, + map_opt_input_shape); std::cout << "TensorRT dynamic shape enabled" << std::endl; } } @@ -96,12 +90,11 @@ void ObjectDetector::LoadModel(const std::string& model_dir, } // Visualiztion MaskDetector results -cv::Mat VisualizeResult( - const cv::Mat& img, - const std::vector& results, - const std::vector& lables, - const std::vector& colormap, - const bool is_rbox = false) { +cv::Mat +VisualizeResult(const cv::Mat &img, + const std::vector &results, + const std::vector &lables, + const std::vector &colormap, const bool is_rbox = false) { cv::Mat vis_img = img.clone(); for (int i = 0; i < results.size(); ++i) { // Configure color and text size @@ -142,24 +135,18 @@ cv::Mat VisualizeResult( origin.y = results[i].rect[1]; // Configure text background - cv::Rect text_back = cv::Rect(results[i].rect[0], - results[i].rect[1] - text_size.height, - text_size.width, - text_size.height); + cv::Rect text_back = + cv::Rect(results[i].rect[0], results[i].rect[1] - text_size.height, + text_size.width, text_size.height); // Draw text, and background cv::rectangle(vis_img, text_back, roi_color, -1); - cv::putText(vis_img, - text, - origin, - font_face, - font_scale, - cv::Scalar(255, 255, 255), - thickness); + cv::putText(vis_img, text, origin, font_face, font_scale, + cv::Scalar(255, 255, 255), thickness); } return vis_img; } -void ObjectDetector::Preprocess(const cv::Mat& ori_im) { +void ObjectDetector::Preprocess(const cv::Mat &ori_im) { // Clone the image : keep the original mat for postprocess cv::Mat im = ori_im.clone(); cv::cvtColor(im, im, cv::COLOR_BGR2RGB); @@ -168,9 +155,8 @@ void ObjectDetector::Preprocess(const cv::Mat& ori_im) { void ObjectDetector::Postprocess( const std::vector mats, - std::vector* result, - std::vector bbox_num, - std::vector output_data_, + std::vector *result, + std::vector bbox_num, std::vector output_data_, bool is_rbox = false) { result->clear(); int start_idx = 0; @@ -226,12 +212,11 @@ void ObjectDetector::Postprocess( } void ObjectDetector::Predict(const std::vector imgs, - const double threshold, - const int warmup, + const double threshold, const int warmup, const int repeats, - std::vector* result, - std::vector* bbox_num, - std::vector* times) { + std::vector *result, + std::vector *bbox_num, + std::vector *times) { auto preprocess_start = std::chrono::steady_clock::now(); int batch_size = imgs.size(); @@ -239,7 +224,7 @@ void ObjectDetector::Predict(const std::vector imgs, std::vector in_data_all; std::vector im_shape_all(batch_size * 2); std::vector scale_factor_all(batch_size * 2); - std::vector output_data_list_; + std::vector output_data_list_; std::vector out_bbox_num_data_; // in_net img for each batch @@ -255,9 +240,8 @@ void ObjectDetector::Predict(const std::vector imgs, scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0]; scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1]; - // TODO: reduce cost time - in_data_all.insert( - in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end()); + in_data_all.insert(in_data_all.end(), inputs_.im_data_.begin(), + inputs_.im_data_.end()); // collect in_net img in_net_img_all[bs_idx] = inputs_.in_net_im_; @@ -276,10 +260,10 @@ void ObjectDetector::Predict(const std::vector imgs, pad_img.convertTo(pad_img, CV_32FC3); std::vector pad_data; pad_data.resize(rc * rh * rw); - float* base = pad_data.data(); + float *base = pad_data.data(); for (int i = 0; i < rc; ++i) { - cv::extractChannel( - pad_img, cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i); + cv::extractChannel(pad_img, + cv::Mat(rh, rw, CV_32FC1, base + i * rh * rw), i); } in_data_all.insert(in_data_all.end(), pad_data.begin(), pad_data.end()); } @@ -290,7 +274,7 @@ void ObjectDetector::Predict(const std::vector imgs, auto preprocess_end = std::chrono::steady_clock::now(); // Prepare input tensor auto input_names = predictor_->GetInputNames(); - for (const auto& tensor_name : input_names) { + for (const auto &tensor_name : input_names) { auto in_tensor = predictor_->GetInputHandle(tensor_name); if (tensor_name == "image") { int rh = inputs_.in_net_shape_[0]; @@ -320,8 +304,8 @@ void ObjectDetector::Predict(const std::vector imgs, for (int j = 0; j < output_names.size(); j++) { auto output_tensor = predictor_->GetOutputHandle(output_names[j]); std::vector output_shape = output_tensor->shape(); - int out_num = std::accumulate( - output_shape.begin(), output_shape.end(), 1, std::multiplies()); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); if (output_tensor->type() == paddle_infer::DataType::INT32) { out_bbox_num_data_.resize(out_num); output_tensor->CopyToCpu(out_bbox_num_data_.data()); @@ -344,8 +328,8 @@ void ObjectDetector::Predict(const std::vector imgs, for (int j = 0; j < output_names.size(); j++) { auto output_tensor = predictor_->GetOutputHandle(output_names[j]); std::vector output_shape = output_tensor->shape(); - int out_num = std::accumulate( - output_shape.begin(), output_shape.end(), 1, std::multiplies()); + int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, + std::multiplies()); output_shape_list.push_back(output_shape); if (output_tensor->type() == paddle_infer::DataType::INT32) { out_bbox_num_data_.resize(out_num); @@ -371,22 +355,15 @@ void ObjectDetector::Predict(const std::vector imgs, if (i == config_.fpn_stride_.size()) { reg_max = output_shape_list[i][2] / 4 - 1; } - float* buffer = new float[out_tensor_list[i].size()]; - memcpy(buffer, - &out_tensor_list[i][0], + float *buffer = new float[out_tensor_list[i].size()]; + memcpy(buffer, &out_tensor_list[i][0], out_tensor_list[i].size() * sizeof(float)); output_data_list_.push_back(buffer); } PaddleDetection::PicoDetPostProcess( - result, - output_data_list_, - config_.fpn_stride_, - inputs_.im_shape_, - inputs_.scale_factor_, - config_.nms_info_["score_threshold"].as(), - config_.nms_info_["nms_threshold"].as(), - num_class, - reg_max); + result, output_data_list_, config_.fpn_stride_, inputs_.im_shape_, + inputs_.scale_factor_, config_.nms_info_["score_threshold"].as(), + config_.nms_info_["nms_threshold"].as(), num_class, reg_max); bbox_num->push_back(result->size()); } else { is_rbox = output_shape_list[0][output_shape_list[0].size() - 1] % 10 == 0; -- GitLab