diff --git a/inference/CMakeLists.txt b/inference/CMakeLists.txt index d168639f6aac124308f276459594302eec19ef11..cd7a767888d9a8a8c85f3ac3fd02fa2ce459f613 100644 --- a/inference/CMakeLists.txt +++ b/inference/CMakeLists.txt @@ -92,7 +92,7 @@ if (WIN32) add_definitions(-DSTATIC_LIB) endif() else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -std=c++11") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -fopenmp -std=c++11") set(CMAKE_STATIC_LIBRARY_PREFIX "") endif() @@ -195,8 +195,8 @@ endif(NOT WIN32) if(WITH_GPU) if(NOT WIN32) if (USE_TENSORRT) - set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) - set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) diff --git a/inference/README.md b/inference/README.md index 71c6c5d300e1fb3529d4e0c9382d1b7f762c3882..4850cbc47d76e4c88619f881e30ff4c751a1b8e2 100644 --- a/inference/README.md +++ b/inference/README.md @@ -123,7 +123,10 @@ DEPLOY: RESIZE_MAX_SIZE: 1333 # 输入的tensor数量。 FEEDS_SIZE: 3 - + # 是否开启TensorRT + USE_TRT: 0 + # 如果开启TensorRT, 使用的精度, 支持FP16, FP32, INT8三个值 + TRT_MODE: FP16 ``` 修改字段`MODEL_PATH`的值为你在**上一步**下载并解压的模型文件所放置的目录即可。更多配置文件字段介绍,请参考文档[预测部署方案配置文件说明](./docs/configuration.md)。 diff --git a/inference/conf/detection_rcnn.yaml b/inference/conf/detection_rcnn.yaml index a53698d7fedaaaec790318dade1621ea578eb0b6..f6b24b068ba1b291c78cb9f983b59c27dddd20eb 100644 --- a/inference/conf/detection_rcnn.yaml +++ b/inference/conf/detection_rcnn.yaml @@ -13,6 +13,8 @@ DEPLOY: CHANNELS : 3 PRE_PROCESSOR: "DetectionPreProcessor" PREDICTOR_MODE: "ANALYSIS" - BATCH_SIZE : 1 + BATCH_SIZE : 1 RESIZE_MAX_SIZE: 1333 FEEDS_SIZE: 3 + USE_TRT : 0 + TRT_MODE : FP32 diff --git a/inference/conf/detection_rcnn_fpn.yaml b/inference/conf/detection_rcnn_fpn.yaml index 9d6635ef8c2b29fb0ca9318d1ec08f1f7be037f7..520471b99bead43e2f9e37b3b08245c393f58258 100644 --- a/inference/conf/detection_rcnn_fpn.yaml +++ b/inference/conf/detection_rcnn_fpn.yaml @@ -17,3 +17,5 @@ DEPLOY: RESIZE_MAX_SIZE: 1333 FEEDS_SIZE: 3 COARSEST_STRIDE: 32 + USE_TRT : 0 + TRT_MODE : FP32 diff --git a/inference/conf/yolov3_mobilenet.yaml b/inference/conf/yolov3_mobilenet.yaml new file mode 100644 index 0000000000000000000000000000000000000000..86dea413fce40d125e3e15cce01d64d55e546880 --- /dev/null +++ b/inference/conf/yolov3_mobilenet.yaml @@ -0,0 +1,20 @@ +DEPLOY: + USE_GPU: 1 + MODEL_PATH: "/root/projects/models/yolov3_mobilenet_v1/" + MODEL_FILENAME: "__model__" + PARAMS_FILENAME: "__params__" + EVAL_CROP_SIZE: (320, 320) + RESIZE_TYPE: "UNPADDING" + TARGET_SHORT_SIZE : 256 + MEAN: [0.4647, 0.4647, 0.4647] + STD: [0.0834, 0.0834, 0.0834] + IMAGE_TYPE: "rgb" + NUM_CLASSES: 1 + CHANNELS : 3 + PRE_PROCESSOR: "DetectionPreProcessor" + PREDICTOR_MODE: "ANALYSIS" + BATCH_SIZE : 3 + RESIZE_MAX_SIZE: -1 + FEEDS_SIZE: 2 + USE_TRT : 1 + TRT_MODE : "FP16" diff --git a/inference/images/detection_rcnn/000000014439.jpg b/inference/images/detection_rcnn/000000014439.jpg index 0abbdab06eb5950b93908cc91adfa640e8a3ac78..5428bb6b1bb8e58a5f19d107e535ba1e6fa3e658 100644 Binary files a/inference/images/detection_rcnn/000000014439.jpg and b/inference/images/detection_rcnn/000000014439.jpg differ diff --git a/inference/images/detection_rcnn/000000014439_640x640.jpg b/inference/images/detection_rcnn/000000014439_640x640.jpg new file mode 100644 index 0000000000000000000000000000000000000000..e1e134ba1bc3596e456b40a1dc0299caa6d65384 Binary files /dev/null and b/inference/images/detection_rcnn/000000014439_640x640.jpg differ diff --git a/inference/predictor/detection_predictor.cpp b/inference/predictor/detection_predictor.cpp index e47125c5e3eff159ea6f43740906f58e08dc78aa..9c1791e4104cdd7e69a666b4e1940f26ce1bb142 100644 --- a/inference/predictor/detection_predictor.cpp +++ b/inference/predictor/detection_predictor.cpp @@ -32,17 +32,18 @@ namespace PaddleSolution { int max_h = -1; int max_w = -1; for (int i = 0; i < batch_size; ++i) { - max_h = (max_h > resize_heights[i])? max_h:resize_heights[i]; - max_w = (max_w > resize_widths[i])? max_w:resize_widths[i]; + max_h = (max_h > resize_heights[i])? max_h : resize_heights[i]; + max_w = (max_w > resize_widths[i])? max_w : resize_widths[i]; } + max_h = static_cast(ceil(static_cast(max_h) - / static_cast(coarsest_stride)) * coarsest_stride); + / static_cast(coarsest_stride)) * coarsest_stride); max_w = static_cast(ceil(static_cast(max_w) - / static_cast(coarsest_stride)) * coarsest_stride); - std::cout << "max_w: " << max_w << " max_h: " << max_h << std::endl; + / static_cast(coarsest_stride)) * coarsest_stride); input_buffer.insert(input_buffer.end(), batch_size * channels * max_h * max_w, 0); // flatten tensor and padding + #pragma omp parallel for for (int i = 0; i < lod_buffer.size(); ++i) { float *input_buffer_ptr = input_buffer.data() + i * channels * max_h * max_w; @@ -121,6 +122,8 @@ namespace PaddleSolution { } bool use_gpu = _model_config._use_gpu; + bool enable_trt = _model_config._enable_trt & use_gpu; + auto trt_precision = _model_config._trt_precision; const auto& model_dir = _model_config._model_path; const auto& model_filename = _model_config._model_file_name; const auto& params_filename = _model_config._param_file_name; @@ -136,11 +139,17 @@ namespace PaddleSolution { config.use_gpu = use_gpu; config.device = 0; _main_predictor = paddle::CreatePaddlePredictor(config); + } else if (_model_config._predictor_mode == "ANALYSIS") { paddle::AnalysisConfig config; if (use_gpu) { config.EnableUseGpu(100, 0); } + if (enable_trt) { + auto use_cab = (trt_precision == paddle::AnalysisConfig::Precision::kInt8); + config.EnableTensorRtEngine(1 << 20, _model_config._batch_size, 40, + trt_precision, false, use_cab); + } auto prog_file = utils::path_join(model_dir, model_filename); auto param_file = utils::path_join(model_dir, params_filename); config.SetModel(prog_file, param_file); @@ -288,7 +297,6 @@ namespace PaddleSolution { } feeds.push_back(im_size_tensor); _outputs.clear(); - auto t1 = std::chrono::high_resolution_clock::now(); if (!_main_predictor->Run(feeds, &_outputs, batch_size)) { #ifdef _WIN32 @@ -376,7 +384,6 @@ namespace PaddleSolution { std::cout << "Failed to preprocess!" << std::endl; return -1; } - // flatten tensor padding_minibatch(lod_buffer, input_buffer, resize_heights, resize_widths, channels, @@ -423,7 +430,6 @@ namespace PaddleSolution { im_size_tensor->Reshape({batch_size, 2}); im_size_tensor->copy_from_cpu(image_size.data()); } - auto t1 = std::chrono::high_resolution_clock::now(); _main_predictor->ZeroCopyRun(); auto t2 = std::chrono::high_resolution_clock::now(); diff --git a/inference/preprocessor/preprocessor_detection.cpp b/inference/preprocessor/preprocessor_detection.cpp index 15d2dc3c4b6255b8639d3296da7be4d22764f475..d60a625947d041cb663e97ce2057c093eeb540b4 100644 --- a/inference/preprocessor/preprocessor_detection.cpp +++ b/inference/preprocessor/preprocessor_detection.cpp @@ -62,16 +62,22 @@ bool DetectionPreProcessor::single_process(const std::string& fname, int rw = im.cols; int rh = im.rows; float im_scale_ratio; - utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0], - _config->_resize[1], _config->_target_short_size, - _config->_resize_max_size, im_scale_ratio); + if (!_config->_enable_trt) { + utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0], + _config->_resize[1], _config->_target_short_size, + _config->_resize_max_size, im_scale_ratio); + } else { + // TensorRT 5 only support fixed shape input + rw = _config->_resize[0]; + rh = _config->_resize[1]; + } cv::Size resize_size(rw, rh); *resize_w = rw; *resize_h = rh; *scale_ratio = im_scale_ratio; if (*ori_h != rh || *ori_w != rw) { cv::Mat im_temp; - if (_config->_resize_type == utils::SCALE_TYPE::UNPADDING) { + if (_config->_enable_trt || _config->_resize_type == utils::SCALE_TYPE::UNPADDING) { cv::resize(im, im_temp, resize_size, 0, 0, cv::INTER_LINEAR); } else if (_config->_resize_type == utils::SCALE_TYPE::RANGE_SCALING) { cv::resize(im, im_temp, cv::Size(), im_scale_ratio, @@ -85,6 +91,7 @@ bool DetectionPreProcessor::single_process(const std::string& fname, float* pmean = _config->_mean.data(); float* pscale = _config->_std.data(); + #pragma omp parallel for for (int h = 0; h < rh; ++h) { const uchar* uptr = im.ptr(h); const float* fptr = im.ptr(h); diff --git a/inference/utils/conf_parser.h b/inference/utils/conf_parser.h index 97461a6e92eee1373cec0ff22e9f9387b77766d3..a4055b2faae5d67a67f8971a96c83f915d70c7c0 100644 --- a/inference/utils/conf_parser.h +++ b/inference/utils/conf_parser.h @@ -18,6 +18,7 @@ #include #include #include +#include namespace PaddleSolution { @@ -30,6 +31,7 @@ class PaddleModelConfigPaser { _channels(0), _use_gpu(0), _batch_size(1), + _enable_trt(false), _target_short_size(0), _model_file_name("__model__"), _param_file_name("__params__"), @@ -58,6 +60,7 @@ class PaddleModelConfigPaser { _resize_max_size = 0; _feeds_size = 1; _coarsest_stride = 1; + _enable_trt = false; } std::string process_parenthesis(const std::string& str) { @@ -214,6 +217,34 @@ class PaddleModelConfigPaser { if (config["DEPLOY"]["COARSEST_STRIDE"].IsDefined()) { _coarsest_stride = config["DEPLOY"]["COARSEST_STRIDE"].as(); } + // 20. enable_trt + if (config["DEPLOY"]["USE_TRT"].IsDefined()) { + _enable_trt = config["DEPLOY"]["USE_TRT"].as(); + _enable_trt &= _use_gpu; + } else { + _enable_trt = false; + } + if (_enable_trt) { + std::string trt_mode = ""; + if (config["DEPLOY"]["TRT_MODE"].IsDefined()) { + trt_mode = config["DEPLOY"]["TRT_MODE"].as(); + } else { + trt_mode = "FP32"; + } + + if (trt_mode == "FP16") { + _trt_precision = paddle::AnalysisConfig::Precision::kHalf; + } else if (trt_mode == "FP32") { + _trt_precision = paddle::AnalysisConfig::Precision::kFloat32; + } else if (trt_mode == "INT8") { + _trt_precision = paddle::AnalysisConfig::Precision::kInt8; + } else { + _enable_trt = false; + } + } + if (_predictor_mode == "NATIVE") { + _enable_trt = false; + } return true; } @@ -293,5 +324,9 @@ class PaddleModelConfigPaser { std::string _predictor_mode; // DEPLOY.BATCH_SIZE int _batch_size; + // bool enable_trt + bool _enable_trt; + // TRT Precision + paddle::AnalysisConfig::Precision _trt_precision; }; } // namespace PaddleSolution