提交 2c6fa076 编写于 作者: B Bin Long 提交者: qingqing01

Support C++ TRT inference (#188)

* C++ inference: support TensorRT
* Update README.md
* add a yolov3 demo yaml
上级 47741382
...@@ -92,7 +92,7 @@ if (WIN32) ...@@ -92,7 +92,7 @@ if (WIN32)
add_definitions(-DSTATIC_LIB) add_definitions(-DSTATIC_LIB)
endif() endif()
else() else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -std=c++11") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -fopenmp -std=c++11")
set(CMAKE_STATIC_LIBRARY_PREFIX "") set(CMAKE_STATIC_LIBRARY_PREFIX "")
endif() endif()
...@@ -195,8 +195,8 @@ endif(NOT WIN32) ...@@ -195,8 +195,8 @@ endif(NOT WIN32)
if(WITH_GPU) if(WITH_GPU)
if(NOT WIN32) if(NOT WIN32)
if (USE_TENSORRT) if (USE_TENSORRT)
set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
endif() endif()
set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
......
...@@ -123,7 +123,10 @@ DEPLOY: ...@@ -123,7 +123,10 @@ DEPLOY:
RESIZE_MAX_SIZE: 1333 RESIZE_MAX_SIZE: 1333
# 输入的tensor数量。 # 输入的tensor数量。
FEEDS_SIZE: 3 FEEDS_SIZE: 3
# 是否开启TensorRT
USE_TRT: 0
# 如果开启TensorRT, 使用的精度, 支持FP16, FP32, INT8三个值
TRT_MODE: FP16
``` ```
修改字段`MODEL_PATH`的值为你在**上一步**下载并解压的模型文件所放置的目录即可。更多配置文件字段介绍,请参考文档[预测部署方案配置文件说明](./docs/configuration.md) 修改字段`MODEL_PATH`的值为你在**上一步**下载并解压的模型文件所放置的目录即可。更多配置文件字段介绍,请参考文档[预测部署方案配置文件说明](./docs/configuration.md)
......
...@@ -16,3 +16,5 @@ DEPLOY: ...@@ -16,3 +16,5 @@ DEPLOY:
BATCH_SIZE : 1 BATCH_SIZE : 1
RESIZE_MAX_SIZE: 1333 RESIZE_MAX_SIZE: 1333
FEEDS_SIZE: 3 FEEDS_SIZE: 3
USE_TRT : 0
TRT_MODE : FP32
...@@ -17,3 +17,5 @@ DEPLOY: ...@@ -17,3 +17,5 @@ DEPLOY:
RESIZE_MAX_SIZE: 1333 RESIZE_MAX_SIZE: 1333
FEEDS_SIZE: 3 FEEDS_SIZE: 3
COARSEST_STRIDE: 32 COARSEST_STRIDE: 32
USE_TRT : 0
TRT_MODE : FP32
DEPLOY:
USE_GPU: 1
MODEL_PATH: "/root/projects/models/yolov3_mobilenet_v1/"
MODEL_FILENAME: "__model__"
PARAMS_FILENAME: "__params__"
EVAL_CROP_SIZE: (320, 320)
RESIZE_TYPE: "UNPADDING"
TARGET_SHORT_SIZE : 256
MEAN: [0.4647, 0.4647, 0.4647]
STD: [0.0834, 0.0834, 0.0834]
IMAGE_TYPE: "rgb"
NUM_CLASSES: 1
CHANNELS : 3
PRE_PROCESSOR: "DetectionPreProcessor"
PREDICTOR_MODE: "ANALYSIS"
BATCH_SIZE : 3
RESIZE_MAX_SIZE: -1
FEEDS_SIZE: 2
USE_TRT : 1
TRT_MODE : "FP16"
...@@ -32,17 +32,18 @@ namespace PaddleSolution { ...@@ -32,17 +32,18 @@ namespace PaddleSolution {
int max_h = -1; int max_h = -1;
int max_w = -1; int max_w = -1;
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
max_h = (max_h > resize_heights[i])? max_h:resize_heights[i]; max_h = (max_h > resize_heights[i])? max_h : resize_heights[i];
max_w = (max_w > resize_widths[i])? max_w:resize_widths[i]; max_w = (max_w > resize_widths[i])? max_w : resize_widths[i];
} }
max_h = static_cast<int>(ceil(static_cast<float>(max_h) max_h = static_cast<int>(ceil(static_cast<float>(max_h)
/ static_cast<float>(coarsest_stride)) * coarsest_stride); / static_cast<float>(coarsest_stride)) * coarsest_stride);
max_w = static_cast<int>(ceil(static_cast<float>(max_w) max_w = static_cast<int>(ceil(static_cast<float>(max_w)
/ static_cast<float>(coarsest_stride)) * coarsest_stride); / static_cast<float>(coarsest_stride)) * coarsest_stride);
std::cout << "max_w: " << max_w << " max_h: " << max_h << std::endl;
input_buffer.insert(input_buffer.end(), input_buffer.insert(input_buffer.end(),
batch_size * channels * max_h * max_w, 0); batch_size * channels * max_h * max_w, 0);
// flatten tensor and padding // flatten tensor and padding
#pragma omp parallel for
for (int i = 0; i < lod_buffer.size(); ++i) { for (int i = 0; i < lod_buffer.size(); ++i) {
float *input_buffer_ptr = input_buffer.data() float *input_buffer_ptr = input_buffer.data()
+ i * channels * max_h * max_w; + i * channels * max_h * max_w;
...@@ -121,6 +122,8 @@ namespace PaddleSolution { ...@@ -121,6 +122,8 @@ namespace PaddleSolution {
} }
bool use_gpu = _model_config._use_gpu; bool use_gpu = _model_config._use_gpu;
bool enable_trt = _model_config._enable_trt & use_gpu;
auto trt_precision = _model_config._trt_precision;
const auto& model_dir = _model_config._model_path; const auto& model_dir = _model_config._model_path;
const auto& model_filename = _model_config._model_file_name; const auto& model_filename = _model_config._model_file_name;
const auto& params_filename = _model_config._param_file_name; const auto& params_filename = _model_config._param_file_name;
...@@ -136,11 +139,17 @@ namespace PaddleSolution { ...@@ -136,11 +139,17 @@ namespace PaddleSolution {
config.use_gpu = use_gpu; config.use_gpu = use_gpu;
config.device = 0; config.device = 0;
_main_predictor = paddle::CreatePaddlePredictor(config); _main_predictor = paddle::CreatePaddlePredictor(config);
} else if (_model_config._predictor_mode == "ANALYSIS") { } else if (_model_config._predictor_mode == "ANALYSIS") {
paddle::AnalysisConfig config; paddle::AnalysisConfig config;
if (use_gpu) { if (use_gpu) {
config.EnableUseGpu(100, 0); config.EnableUseGpu(100, 0);
} }
if (enable_trt) {
auto use_cab = (trt_precision == paddle::AnalysisConfig::Precision::kInt8);
config.EnableTensorRtEngine(1 << 20, _model_config._batch_size, 40,
trt_precision, false, use_cab);
}
auto prog_file = utils::path_join(model_dir, model_filename); auto prog_file = utils::path_join(model_dir, model_filename);
auto param_file = utils::path_join(model_dir, params_filename); auto param_file = utils::path_join(model_dir, params_filename);
config.SetModel(prog_file, param_file); config.SetModel(prog_file, param_file);
...@@ -288,7 +297,6 @@ namespace PaddleSolution { ...@@ -288,7 +297,6 @@ namespace PaddleSolution {
} }
feeds.push_back(im_size_tensor); feeds.push_back(im_size_tensor);
_outputs.clear(); _outputs.clear();
auto t1 = std::chrono::high_resolution_clock::now(); auto t1 = std::chrono::high_resolution_clock::now();
if (!_main_predictor->Run(feeds, &_outputs, batch_size)) { if (!_main_predictor->Run(feeds, &_outputs, batch_size)) {
#ifdef _WIN32 #ifdef _WIN32
...@@ -376,7 +384,6 @@ namespace PaddleSolution { ...@@ -376,7 +384,6 @@ namespace PaddleSolution {
std::cout << "Failed to preprocess!" << std::endl; std::cout << "Failed to preprocess!" << std::endl;
return -1; return -1;
} }
// flatten tensor // flatten tensor
padding_minibatch(lod_buffer, input_buffer, resize_heights, padding_minibatch(lod_buffer, input_buffer, resize_heights,
resize_widths, channels, resize_widths, channels,
...@@ -423,7 +430,6 @@ namespace PaddleSolution { ...@@ -423,7 +430,6 @@ namespace PaddleSolution {
im_size_tensor->Reshape({batch_size, 2}); im_size_tensor->Reshape({batch_size, 2});
im_size_tensor->copy_from_cpu(image_size.data()); im_size_tensor->copy_from_cpu(image_size.data());
} }
auto t1 = std::chrono::high_resolution_clock::now(); auto t1 = std::chrono::high_resolution_clock::now();
_main_predictor->ZeroCopyRun(); _main_predictor->ZeroCopyRun();
auto t2 = std::chrono::high_resolution_clock::now(); auto t2 = std::chrono::high_resolution_clock::now();
......
...@@ -62,16 +62,22 @@ bool DetectionPreProcessor::single_process(const std::string& fname, ...@@ -62,16 +62,22 @@ bool DetectionPreProcessor::single_process(const std::string& fname,
int rw = im.cols; int rw = im.cols;
int rh = im.rows; int rh = im.rows;
float im_scale_ratio; float im_scale_ratio;
if (!_config->_enable_trt) {
utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0], utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0],
_config->_resize[1], _config->_target_short_size, _config->_resize[1], _config->_target_short_size,
_config->_resize_max_size, im_scale_ratio); _config->_resize_max_size, im_scale_ratio);
} else {
// TensorRT 5 only support fixed shape input
rw = _config->_resize[0];
rh = _config->_resize[1];
}
cv::Size resize_size(rw, rh); cv::Size resize_size(rw, rh);
*resize_w = rw; *resize_w = rw;
*resize_h = rh; *resize_h = rh;
*scale_ratio = im_scale_ratio; *scale_ratio = im_scale_ratio;
if (*ori_h != rh || *ori_w != rw) { if (*ori_h != rh || *ori_w != rw) {
cv::Mat im_temp; cv::Mat im_temp;
if (_config->_resize_type == utils::SCALE_TYPE::UNPADDING) { if (_config->_enable_trt || _config->_resize_type == utils::SCALE_TYPE::UNPADDING) {
cv::resize(im, im_temp, resize_size, 0, 0, cv::INTER_LINEAR); cv::resize(im, im_temp, resize_size, 0, 0, cv::INTER_LINEAR);
} else if (_config->_resize_type == utils::SCALE_TYPE::RANGE_SCALING) { } else if (_config->_resize_type == utils::SCALE_TYPE::RANGE_SCALING) {
cv::resize(im, im_temp, cv::Size(), im_scale_ratio, cv::resize(im, im_temp, cv::Size(), im_scale_ratio,
...@@ -85,6 +91,7 @@ bool DetectionPreProcessor::single_process(const std::string& fname, ...@@ -85,6 +91,7 @@ bool DetectionPreProcessor::single_process(const std::string& fname,
float* pmean = _config->_mean.data(); float* pmean = _config->_mean.data();
float* pscale = _config->_std.data(); float* pscale = _config->_std.data();
#pragma omp parallel for
for (int h = 0; h < rh; ++h) { for (int h = 0; h < rh; ++h) {
const uchar* uptr = im.ptr<uchar>(h); const uchar* uptr = im.ptr<uchar>(h);
const float* fptr = im.ptr<float>(h); const float* fptr = im.ptr<float>(h);
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <vector> #include <vector>
#include <string> #include <string>
#include <map> #include <map>
#include <paddle_inference_api.h>
namespace PaddleSolution { namespace PaddleSolution {
...@@ -30,6 +31,7 @@ class PaddleModelConfigPaser { ...@@ -30,6 +31,7 @@ class PaddleModelConfigPaser {
_channels(0), _channels(0),
_use_gpu(0), _use_gpu(0),
_batch_size(1), _batch_size(1),
_enable_trt(false),
_target_short_size(0), _target_short_size(0),
_model_file_name("__model__"), _model_file_name("__model__"),
_param_file_name("__params__"), _param_file_name("__params__"),
...@@ -58,6 +60,7 @@ class PaddleModelConfigPaser { ...@@ -58,6 +60,7 @@ class PaddleModelConfigPaser {
_resize_max_size = 0; _resize_max_size = 0;
_feeds_size = 1; _feeds_size = 1;
_coarsest_stride = 1; _coarsest_stride = 1;
_enable_trt = false;
} }
std::string process_parenthesis(const std::string& str) { std::string process_parenthesis(const std::string& str) {
...@@ -214,6 +217,34 @@ class PaddleModelConfigPaser { ...@@ -214,6 +217,34 @@ class PaddleModelConfigPaser {
if (config["DEPLOY"]["COARSEST_STRIDE"].IsDefined()) { if (config["DEPLOY"]["COARSEST_STRIDE"].IsDefined()) {
_coarsest_stride = config["DEPLOY"]["COARSEST_STRIDE"].as<int>(); _coarsest_stride = config["DEPLOY"]["COARSEST_STRIDE"].as<int>();
} }
// 20. enable_trt
if (config["DEPLOY"]["USE_TRT"].IsDefined()) {
_enable_trt = config["DEPLOY"]["USE_TRT"].as<int>();
_enable_trt &= _use_gpu;
} else {
_enable_trt = false;
}
if (_enable_trt) {
std::string trt_mode = "";
if (config["DEPLOY"]["TRT_MODE"].IsDefined()) {
trt_mode = config["DEPLOY"]["TRT_MODE"].as<std::string>();
} else {
trt_mode = "FP32";
}
if (trt_mode == "FP16") {
_trt_precision = paddle::AnalysisConfig::Precision::kHalf;
} else if (trt_mode == "FP32") {
_trt_precision = paddle::AnalysisConfig::Precision::kFloat32;
} else if (trt_mode == "INT8") {
_trt_precision = paddle::AnalysisConfig::Precision::kInt8;
} else {
_enable_trt = false;
}
}
if (_predictor_mode == "NATIVE") {
_enable_trt = false;
}
return true; return true;
} }
...@@ -293,5 +324,9 @@ class PaddleModelConfigPaser { ...@@ -293,5 +324,9 @@ class PaddleModelConfigPaser {
std::string _predictor_mode; std::string _predictor_mode;
// DEPLOY.BATCH_SIZE // DEPLOY.BATCH_SIZE
int _batch_size; int _batch_size;
// bool enable_trt
bool _enable_trt;
// TRT Precision
paddle::AnalysisConfig::Precision _trt_precision;
}; };
} // namespace PaddleSolution } // namespace PaddleSolution
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册