C++ infer: support tensorrt fp16/fp32/int8

2a2f5756 · sjtubinlong · ca2a239a · 2a2f5756 · 2a2f5756 · 2a2f5756
4 changed file
--- a/deploy/cpp/CMakeLists.txt
+++ b/deploy/cpp/CMakeLists.txt
@@ -195,8 +195,8 @@ endif(NOT WIN32)
 if(WITH_GPU)
  if(NOT WIN32)
    if (USE_TENSORRT)
-      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
-      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
    endif()
    set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
    set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})

--- a/deploy/cpp/predictor/seg_predictor.cpp
+++ b/deploy/cpp/predictor/seg_predictor.cpp
@@ -36,7 +36,6 @@ namespace PaddleSolution {
        const auto& model_dir = _model_config._model_path;
        const auto& model_filename = _model_config._model_file_name;
        const auto& params_filename = _model_config._param_file_name;
-
        // load paddle model file
        if (_model_config._predictor_mode == "NATIVE") {
            paddle::NativeConfig config;
@@ -52,6 +51,12 @@ namespace PaddleSolution {
            paddle::AnalysisConfig config;
            if (use_gpu) {
                config.EnableUseGpu(100, 0);
+                if (TRT_MAP.find(_model_config._trt_mode) != TRT_MAP.end()) {
+                    auto precision = TRT_MAP[_model_config._trt_mode];
+                    bool use_cab = (precision == paddle::AnalysisConfig::Precision::kInt8);
+                    config.EnableTensorRtEngine(1 << 30, _model_config._batch_size, 40,
+                        precision, false, use_cab);
+                }
            }
            auto prog_file = utils::path_join(model_dir, model_filename);
            auto param_file = utils::path_join(model_dir, params_filename);

--- a/deploy/cpp/predictor/seg_predictor.h
+++ b/deploy/cpp/predictor/seg_predictor.h
@@ -55,5 +55,10 @@ class Predictor {
    PaddleSolution::PaddleSegModelConfigPaser _model_config;
    std::shared_ptr<PaddleSolution::ImagePreProcessor> _preprocessor;
    std::unique_ptr<paddle::PaddlePredictor> _main_predictor;
+    std::map<std::string, paddle::AnalysisConfig::Precision> TRT_MAP = {
+        {"FP32", paddle::AnalysisConfig::Precision::kFloat32},
+        {"FP16", paddle::AnalysisConfig::Precision::kHalf},
+        {"INT8", paddle::AnalysisConfig::Precision::kInt8}
+    };
 };
 }  // namespace PaddleSolution
--- a/deploy/cpp/utils/seg_conf_parser.h
+++ b/deploy/cpp/utils/seg_conf_parser.h
@@ -46,6 +46,7 @@ class PaddleSegModelConfigPaser {
        _model_file_name.clear();
        _model_path.clear();
        _param_file_name.clear();
+        _trt_mode.clear();
    }

    std::string process_parenthesis(const std::string& str) {
@@ -180,6 +181,12 @@ class PaddleSegModelConfigPaser {
        } else {
            _use_pr = 0;
        }
+        // 16. trt_mode
+	if (config["DEPLOY"]["TRT_MODE"].IsDefined()) {
+            _trt_mode = config["DEPLOY"]["TRT_MODE"].as<std::string>();
+        } else {
+            _trt_mode = "";
+        }
        return true;
    }

@@ -246,8 +253,10 @@ class PaddleSegModelConfigPaser {
    std::string _predictor_mode;
    // DEPLOY.BATCH_SIZE
    int _batch_size;
-    // USE_PR: OP Optimized model
+    // DEPLOY.USE_PR: OP Optimized model
    int _use_pr;
+    // DEPLOY.TRT_MODE: TRT Precesion
+    std::string _trt_mode;
 };

 }  // namespace PaddleSolution