Merge pull request #1587 from LDOUBLEV/trt_cpp

add tensorrt predict for cpp_infer demo

Merge pull request #1587 from LDOUBLEV/trt_cpp
add tensorrt predict for cpp_infer demo
c1e86b56 · Double_V · GitHub · 40e83f22 · ae80a832 · c1e86b56
9 changed file
--- a/deploy/cpp_infer/include/config.h
+++ b/deploy/cpp_infer/include/config.h
@@ -62,6 +62,10 @@ public:
    this->cls_thresh = stod(config_map_["cls_thresh"]);
    this->visualize = bool(stoi(config_map_["visualize"]));
+    this->use_tensorrt = bool(stoi(config_map_["use_tensorrt"]));
+    this->use_fp16 = bool(stod(config_map_["use_fp16"]));
  }
  bool use_gpu = false;
@@ -96,6 +100,10 @@ public:
  bool visualize = true;
+  bool use_tensorrt = false;
+  bool use_fp16 = false;
  void PrintConfigInfo();
 private:

--- a/deploy/cpp_infer/include/ocr_cls.h
+++ b/deploy/cpp_infer/include/ocr_cls.h
@@ -39,7 +39,8 @@ public:
  explicit Classifier(const std::string &model_dir, const bool &use_gpu,
                      const int &gpu_id, const int &gpu_mem,
                      const int &cpu_math_library_num_threads,
-                      const bool &use_mkldnn, const double &cls_thresh) {
+                      const bool &use_mkldnn, const double &cls_thresh,
+                      const bool &use_tensorrt, const &bool use_fp16) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -47,6 +48,8 @@ public:
    this->use_mkldnn_ = use_mkldnn;
    this->cls_thresh = cls_thresh;
+    this->use_tensorrt_ = use_tensorrt;
+    this->use_fp16_ = use_fp16;
    LoadModel(model_dir);
  }
@@ -69,7 +72,8 @@ private:
  std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
  bool is_scale_ = true;
+  bool use_tensorrt_ = false;
+  bool use_fp16_ = false;
  // pre-process
  ClsResizeImg resize_op_;
  Normalize normalize_op_;

--- a/deploy/cpp_infer/include/ocr_det.h
+++ b/deploy/cpp_infer/include/ocr_det.h
@@ -45,7 +45,8 @@ public:
                      const double &det_db_thresh,
                      const double &det_db_box_thresh,
                      const double &det_db_unclip_ratio,
-                      const bool &visualize) {
+                      const bool &visualize const bool &use_tensorrt,
+                      const bool &use_fp16) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
@@ -59,6 +60,8 @@ public:
    this->det_db_unclip_ratio_ = det_db_unclip_ratio;
    this->visualize_ = visualize;
+    this->use_tensorrt_ = use_tensorrt;
+    this->use_fp16_ = use_fp16;
    LoadModel(model_dir);
  }
@@ -85,6 +88,8 @@ private:
  double det_db_unclip_ratio_ = 2.0;
  bool visualize_ = true;
+  bool use_tensorrt_ = false;
+  bool use_fp16_ = false;
  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
  std::vector<float> scale_ = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};

--- a/deploy/cpp_infer/include/ocr_rec.h
+++ b/deploy/cpp_infer/include/ocr_rec.h
@@ -41,12 +41,15 @@ public:
  explicit CRNNRecognizer(const std::string &model_dir, const bool &use_gpu,
                          const int &gpu_id, const int &gpu_mem,
                          const int &cpu_math_library_num_threads,
-                          const bool &use_mkldnn, const string &label_path) {
+                          const bool &use_mkldnn, const string &label_path,
+                          const bool &use_tensorrt, const bool &use_fp16) {
    this->use_gpu_ = use_gpu;
    this->gpu_id_ = gpu_id;
    this->gpu_mem_ = gpu_mem;
    this->cpu_math_library_num_threads_ = cpu_math_library_num_threads;
    this->use_mkldnn_ = use_mkldnn;
+    this->use_tensorrt_ = use_tensorrt;
+    this->use_fp16_ = use_fp16;
    this->label_list_ = Utility::ReadDict(label_path);
    this->label_list_.insert(this->label_list_.begin(),
@@ -76,7 +79,8 @@ private:
  std::vector<float> mean_ = {0.5f, 0.5f, 0.5f};
  std::vector<float> scale_ = {1 / 0.5f, 1 / 0.5f, 1 / 0.5f};
  bool is_scale_ = true;
+  bool use_tensorrt_ = false;
+  bool use_fp16_ = false;
  // pre-process
  CrnnResizeImg resize_op_;
  Normalize normalize_op_;

--- a/deploy/cpp_infer/src/main.cpp
+++ b/deploy/cpp_infer/src/main.cpp
@@ -54,18 +54,20 @@ int main(int argc, char **argv) {
                 config.gpu_mem, config.cpu_math_library_num_threads,
                 config.use_mkldnn, config.max_side_len, config.det_db_thresh,
                 config.det_db_box_thresh, config.det_db_unclip_ratio,
-                 config.visualize);
+                 config.visualize, config.use_tensorrt, config.use_fp16);
  Classifier *cls = nullptr;
  if (config.use_angle_cls == true) {
    cls = new Classifier(config.cls_model_dir, config.use_gpu, config.gpu_id,
                         config.gpu_mem, config.cpu_math_library_num_threads,
-                         config.use_mkldnn, config.cls_thresh);
+                         config.use_mkldnn, config.cls_thresh,
+                         config.use_tensorrt, config.use_fp16);
  }
  CRNNRecognizer rec(config.rec_model_dir, config.use_gpu, config.gpu_id,
                     config.gpu_mem, config.cpu_math_library_num_threads,
-                     config.use_mkldnn, config.char_list_file);
+                     config.use_mkldnn, config.char_list_file,
+                     config.use_tensorrt, config.use_fp16);
  auto start = std::chrono::system_clock::now();
  std::vector<std::vector<std::vector<int>>> boxes;

--- a/deploy/cpp_infer/src/ocr_cls.cpp
+++ b/deploy/cpp_infer/src/ocr_cls.cpp
@@ -76,12 +76,19 @@ void Classifier::LoadModel(const std::string &model_dir) {
  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+    if (this->use_tensorrt_) {
+      config.EnableTensorRtEngine(
+          1 << 20, 10, 3,
+          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
+                          : paddle_infer::Config::Precision::kFloat32,
+          false, false);
+    }
  } else {
    config.DisableGpu();
    if (this->use_mkldnn_) {
      config.EnableMKLDNN();
    }
-    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_);
+    config.SetCpuMathLibraryNumThreads(this->cpu_math_library_num_threads_)
  }
  // false for zero copy tensor

--- a/deploy/cpp_infer/src/ocr_det.cpp
+++ b/deploy/cpp_infer/src/ocr_det.cpp
@@ -24,10 +24,13 @@ void DBDetector::LoadModel(const std::string &model_dir) {
  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
-    //     config.EnableTensorRtEngine(
+    if (this->use_tensorrt_) {
-    //           1 << 20, 1, 3,
+      config.EnableTensorRtEngine(
-    //           AnalysisConfig::Precision::kFloat32,
+          1 << 20, 10, 3,
-    //           false, false);
+          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
+                          : paddle_infer::Config::Precision::kFloat32,
+          false, false);
+    }
  } else {
    config.DisableGpu();
    if (this->use_mkldnn_) {

--- a/deploy/cpp_infer/src/ocr_rec.cpp
+++ b/deploy/cpp_infer/src/ocr_rec.cpp
@@ -99,6 +99,13 @@ void CRNNRecognizer::LoadModel(const std::string &model_dir) {
  if (this->use_gpu_) {
    config.EnableUseGpu(this->gpu_mem_, this->gpu_id_);
+    if (this->use_tensorrt_) {
+      config.EnableTensorRtEngine(
+          1 << 20, 10, 3,
+          this->use_fp16_ ? paddle_infer::Config::Precision::kHalf
+                          : paddle_infer::Config::Precision::kFloat32,
+          false, false);
+    }
  } else {
    config.DisableGpu();
    if (this->use_mkldnn_) {

--- a/deploy/cpp_infer/tools/config.txt
+++ b/deploy/cpp_infer/tools/config.txt
@@ -24,3 +24,7 @@ char_list_file ../../ppocr/utils/ppocr_keys_v1.txt
 # show the detection results
 visualize 1
+# use_tensorrt
+use_tensorrt 0
+use_fp16   0