Add TesnorRT support in cpp deployment (#704)

* Add TesnorRT support in cpp deployment * Change putText style

Add TesnorRT support in cpp deployment (#704)
* Add TesnorRT support in cpp deployment * Change putText style
24c01d29 · qingqing01 · GitHub · 4b17659c · 24c01d29 · 24c01d29
5 changed file
--- a/deploy/cpp/docs/linux_build.md
+++ b/deploy/cpp/docs/linux_build.md
 # Linux平台编译指南

 ## 说明
-本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过，如果需要使用更高G++版本编译使用，则需要重新编译Paddle预测库，请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html#id15)。
+本文档在 `Linux`平台使用`GCC 4.8.5` 和 `GCC 4.9.4`测试过，如果需要使用更高G++版本编译使用，则需要重新编译Paddle预测库，请参考: [从源码编译Paddle预测库](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)。

 ## 前置条件
 * G++ 4.8.2 ~ 4.9.4
@@ -19,7 +19,7 @@

 ### Step2: 下载PaddlePaddle C++ 预测库 fluid_inference

-PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/build_and_install_lib_cn.html)
+PaddlePaddle C++ 预测库针对不同的`CPU`和`CUDA`版本提供了不同的预编译版本，请根据实际情况下载:  [C++预测库下载列表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/build_and_install_lib_cn.html)


 下载并解压后`/root/projects/fluid_inference`目录包含内容为：
@@ -39,6 +39,8 @@ fluid_inference

 编译`cmake`的命令在`scripts/build.sh`中，请根据实际情况修改主要参数，其主要内容说明如下：

+```
+
 # 是否使用GPU(即是否使用 CUDA)
 WITH_GPU=OFF
 # 使用MKL or openblas
@@ -93,6 +95,7 @@ make
 | image_path  | 要预测的图片文件路径 |
 | video_path  | 要预测的视频文件路径 |
 | use_gpu  | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
+| --run_mode |使用GPU时，默认为fluid, 可选（fluid/trt_fp32/trt_fp16）|

 **注意**：如果同时设置了`video_path`和`image_path`，程序仅预测`video_path`。


--- a/deploy/cpp/include/object_detector.h
+++ b/deploy/cpp/include/object_detector.h
@@ -54,17 +54,21 @@ cv::Mat VisualizeResult(const cv::Mat& img,

 class ObjectDetector {
 public:
-  explicit ObjectDetector(const std::string& model_dir, bool use_gpu = false) {
+  explicit ObjectDetector(const std::string& model_dir, bool use_gpu = false,
+                          const std::string& run_mode = "fluid") {
    config_.load_config(model_dir);
    threshold_ = config_.draw_threshold_;
    preprocessor_.Init(config_.preprocess_info_, config_.arch_);
-    LoadModel(model_dir, use_gpu);
+    LoadModel(model_dir, use_gpu, config_.min_subgraph_size_, 1, run_mode);
  }

  // Load Paddle inference model
  void LoadModel(
    const std::string& model_dir,
-    bool use_gpu);
+    bool use_gpu,
+    const int min_subgraph_size,
+    const int batch_size = 1,
+    const std::string& run_mode = "fluid");

  // Run predictor
  void Predict(

--- a/deploy/cpp/scripts/build.sh
+++ b/deploy/cpp/scripts/build.sh
@@ -4,7 +4,7 @@ WITH_GPU=OFF
 WITH_MKL=ON
 # 是否集成 TensorRT(仅WITH_GPU=ON 有效)
 WITH_TENSORRT=OFF
-# TensorRT 的lib路径
+# TensorRT 的路径
 TENSORRT_DIR=/path/to/TensorRT/
 # Paddle 预测库路径
 PADDLE_DIR=/path/to/fluid_inference/

--- a/deploy/cpp/src/main.cc
+++ b/deploy/cpp/src/main.cc
@@ -25,6 +25,7 @@ DEFINE_string(model_dir, "", "Path of inference model");
 DEFINE_string(image_path, "", "Path of input image");
 DEFINE_string(video_path, "", "Path of input video");
 DEFINE_bool(use_gpu, false, "Infering with GPU or CPU");
+DEFINE_string(run_mode, "fluid", "mode of running(fluid/trt_fp32/trt_fp16)");

 void PredictVideo(const std::string& video_path,
                  PaddleDetection::ObjectDetector* det) {
@@ -93,7 +94,10 @@ void PredictImage(const std::string& image_path,
  auto colormap = PaddleDetection::GenerateColorMap(labels.size());
  cv::Mat vis_img = PaddleDetection::VisualizeResult(
      im, result, labels, colormap);
-  cv::imwrite("output.jpeg", vis_img);
+  std::vector<int> compression_params;
+  compression_params.push_back(CV_IMWRITE_JPEG_QUALITY);
+  compression_params.push_back(95);
+  cv::imwrite("output.jpeg", vis_img, compression_params);
  printf("Visualized output saved as output.jpeg\n");
 }

@@ -102,13 +106,19 @@ int main(int argc, char** argv) {
  google::ParseCommandLineFlags(&argc, &argv, true);
  if (FLAGS_model_dir.empty()
      || (FLAGS_image_path.empty() && FLAGS_video_path.empty())) {
-      std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
+    std::cout << "Usage: ./main --model_dir=/PATH/TO/INFERENCE_MODEL/ "
                << "--image_path=/PATH/TO/INPUT/IMAGE/" << std::endl;
-      return -1;
+    return -1;
+  }
+  if (!(FLAGS_run_mode == "fluid" || FLAGS_run_mode == "trt_fp32"
+      || FLAGS_run_mode == "trt_fp16")) {
+    std::cout << "run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'.";
+    return -1;
  }

  // Load model and create a object detector
-  PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu);
+  PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu,
+    FLAGS_run_mode);
  // Do inference on input video or image
  if (!FLAGS_video_path.empty()) {
    PredictVideo(FLAGS_video_path, &det);

--- a/deploy/cpp/src/object_detector.cc
+++ b/deploy/cpp/src/object_detector.cc
@@ -17,15 +17,38 @@
 namespace PaddleDetection {

 // Load Model and create model predictor
-void ObjectDetector::LoadModel(const std::string& model_dir, bool use_gpu) {
+void ObjectDetector::LoadModel(const std::string& model_dir,
+                               bool use_gpu,
+                               const int batch_size,
+                               const int min_subgraph_size,
+                               const std::string& run_mode) {
  paddle::AnalysisConfig config;
  std::string prog_file = model_dir + OS_PATH_SEP + "__model__";
  std::string params_file = model_dir + OS_PATH_SEP + "__params__";
  config.SetModel(prog_file, params_file);
  if (use_gpu) {
-      config.EnableUseGpu(100, 0);
+    config.EnableUseGpu(100, 0);
+    if (run_mode != "fluid") {
+      auto precision = paddle::AnalysisConfig::Precision::kFloat32;
+      if (run_mode == "trt_fp16") {
+        precision = paddle::AnalysisConfig::Precision::kHalf;
+      } else if (run_mode == "trt_int8") {
+        precision = paddle::AnalysisConfig::Precision::kInt8;
+      } else {
+        if (run_mode != "trt_32") {
+          printf("run_mode should be 'fluid', 'trt_fp32' or 'trt_fp16'");
+        }
+      }
+      config.EnableTensorRtEngine(
+          1 << 10,
+          batch_size,
+          min_subgraph_size,
+          precision,
+          false,
+          run_mode == "trt_int8");
+    }
  } else {
-      config.DisableGpu();
+    config.DisableGpu();
  }
  config.SwitchUseFeedFetchOps(false);
  config.SwitchSpecifyInputNames(true);
@@ -51,6 +74,7 @@ cv::Mat VisualizeResult(const cv::Mat& img,
    int c2 = colormap[3 * results[i].class_id + 1];
    int c3 = colormap[3 * results[i].class_id + 2];
    cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
+    text += " ";
    text += std::to_string(static_cast<int>(results[i].confidence * 100)) + "%";
    int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
    double font_scale = 0.5f;
@@ -60,12 +84,6 @@ cv::Mat VisualizeResult(const cv::Mat& img,
                                         font_scale,
                                         thickness,
                                         nullptr);
-    float new_font_scale = roi.width * font_scale / text_size.width;
-    text_size = cv::getTextSize(text,
-                               font_face,
-                               new_font_scale,
-                               thickness,
-                               nullptr);
    cv::Point origin;
    origin.x = roi.x;
    origin.y = roi.y;
@@ -83,7 +101,7 @@ cv::Mat VisualizeResult(const cv::Mat& img,
                text,
                origin,
                font_face,
-                new_font_scale,
+                font_scale,
                cv::Scalar(255, 255, 255),
                thickness);
  }