未验证 提交 01cb2ee8 编写于 作者: G Guanghua Yu 提交者: GitHub

[cherry-pick]fix batch_size when trt infer (#3104)

* fix batch_size when trt infer
上级 ab6d3c53
...@@ -162,6 +162,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ ...@@ -162,6 +162,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size |预测时的batch size,在指定`image_dir`时有效 |
| --run_benchmark | 是否重复预测来进行benchmark测速 | | --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output | | --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
......
...@@ -104,6 +104,7 @@ make ...@@ -104,6 +104,7 @@ make
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
| --run_benchmark | 是否重复预测来进行benchmark测速 | | --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output | | --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
......
...@@ -54,7 +54,7 @@ cd D:\projects\PaddleDetection\deploy\cpp ...@@ -54,7 +54,7 @@ cd D:\projects\PaddleDetection\deploy\cpp
2. 使用CMake生成项目文件 2. 使用CMake生成项目文件
编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): 编译参数的含义说明如下(带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**):
| 参数名 | 含义 | | 参数名 | 含义 |
| ---- | ---- | | ---- | ---- |
...@@ -99,6 +99,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release ...@@ -99,6 +99,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
| --run_benchmark | 是否重复预测来进行benchmark测速 | | --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output | | --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
......
...@@ -62,6 +62,7 @@ class ObjectDetector { ...@@ -62,6 +62,7 @@ class ObjectDetector {
bool use_mkldnn=false, bool use_mkldnn=false,
int cpu_threads=1, int cpu_threads=1,
const std::string& run_mode="fluid", const std::string& run_mode="fluid",
const int batch_size=1,
const int gpu_id=0, const int gpu_id=0,
bool use_dynamic_shape=false, bool use_dynamic_shape=false,
const int trt_min_shape=1, const int trt_min_shape=1,
...@@ -83,7 +84,7 @@ class ObjectDetector { ...@@ -83,7 +84,7 @@ class ObjectDetector {
threshold_ = config_.draw_threshold_; threshold_ = config_.draw_threshold_;
image_shape_ = config_.image_shape_; image_shape_ = config_.image_shape_;
preprocessor_.Init(config_.preprocess_info_, image_shape_); preprocessor_.Init(config_.preprocess_info_, image_shape_);
LoadModel(model_dir, 1, run_mode); LoadModel(model_dir, batch_size, run_mode);
} }
// Load Paddle inference model // Load Paddle inference model
......
...@@ -72,7 +72,7 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){ ...@@ -72,7 +72,7 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads;
LOG(INFO) << "----------------------- Data info -----------------------"; LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size: " << 1; LOG(INFO) << "batch_size: " << FLAGS_batch_size;
LOG(INFO) << "input_shape: " << "dynamic shape"; LOG(INFO) << "input_shape: " << "dynamic shape";
LOG(INFO) << "----------------------- Model info -----------------------"; LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1);
...@@ -332,7 +332,7 @@ void PredictImage(const std::vector<std::string> all_img_paths, ...@@ -332,7 +332,7 @@ void PredictImage(const std::vector<std::string> all_img_paths,
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) { if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
output_path += OS_PATH_SEP; output_path += OS_PATH_SEP;
} }
std::string image_file_path = all_img_paths.at(idx * batch_size+bs); std::string image_file_path = all_img_paths.at(idx * batch_size + bs);
output_path += image_file_path.substr(image_file_path.find_last_of('/') + 1); output_path += image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::imwrite(output_path, vis_img, compression_params); cv::imwrite(output_path, vis_img, compression_params);
printf("Visualized output saved as %s\n", output_path.c_str()); printf("Visualized output saved as %s\n", output_path.c_str());
...@@ -361,7 +361,7 @@ int main(int argc, char** argv) { ...@@ -361,7 +361,7 @@ int main(int argc, char** argv) {
} }
// Load model and create a object detector // Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn, PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_gpu_id, FLAGS_use_dynamic_shape, FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_use_dynamic_shape,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode); FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode);
// Do inference on input video or image // Do inference on input video or image
if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
......
...@@ -35,6 +35,7 @@ python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsi ...@@ -35,6 +35,7 @@ python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsi
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4|
| --use_gpu | No |是否GPU,默认为False| | --use_gpu | No |是否GPU,默认为False|
| --run_mode | No |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| | --run_mode | No |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | No |预测时的batch size,在指定`image_dir`时有效 |
| --threshold | No|预测得分的阈值,默认为0.5| | --threshold | No|预测得分的阈值,默认为0.5|
| --output_dir | No|可视化结果保存的根目录,默认为output/| | --output_dir | No|可视化结果保存的根目录,默认为output/|
| --run_benchmark | No| 是否运行benchmark,同时需指定`--image_file``--image_dir` | | --run_benchmark | No| 是否运行benchmark,同时需指定`--image_file``--image_dir` |
......
...@@ -50,6 +50,7 @@ class Detector(object): ...@@ -50,6 +50,7 @@ class Detector(object):
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
use_dynamic_shape (bool): use dynamic shape or not use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt
...@@ -63,6 +64,7 @@ class Detector(object): ...@@ -63,6 +64,7 @@ class Detector(object):
model_dir, model_dir,
use_gpu=False, use_gpu=False,
run_mode='fluid', run_mode='fluid',
batch_size=1,
use_dynamic_shape=False, use_dynamic_shape=False,
trt_min_shape=1, trt_min_shape=1,
trt_max_shape=1280, trt_max_shape=1280,
...@@ -74,6 +76,7 @@ class Detector(object): ...@@ -74,6 +76,7 @@ class Detector(object):
self.predictor, self.config = load_predictor( self.predictor, self.config = load_predictor(
model_dir, model_dir,
run_mode=run_mode, run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size, min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu, use_gpu=use_gpu,
use_dynamic_shape=use_dynamic_shape, use_dynamic_shape=use_dynamic_shape,
...@@ -186,6 +189,7 @@ class DetectorSOLOv2(Detector): ...@@ -186,6 +189,7 @@ class DetectorSOLOv2(Detector):
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
use_dynamic_shape (bool): use dynamic shape or not use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt
...@@ -198,6 +202,7 @@ class DetectorSOLOv2(Detector): ...@@ -198,6 +202,7 @@ class DetectorSOLOv2(Detector):
model_dir, model_dir,
use_gpu=False, use_gpu=False,
run_mode='fluid', run_mode='fluid',
batch_size=1,
use_dynamic_shape=False, use_dynamic_shape=False,
trt_min_shape=1, trt_min_shape=1,
trt_max_shape=1280, trt_max_shape=1280,
...@@ -209,6 +214,7 @@ class DetectorSOLOv2(Detector): ...@@ -209,6 +214,7 @@ class DetectorSOLOv2(Detector):
self.predictor, self.config = load_predictor( self.predictor, self.config = load_predictor(
model_dir, model_dir,
run_mode=run_mode, run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size, min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu, use_gpu=use_gpu,
use_dynamic_shape=use_dynamic_shape, use_dynamic_shape=use_dynamic_shape,
...@@ -568,6 +574,7 @@ def main(): ...@@ -568,6 +574,7 @@ def main():
FLAGS.model_dir, FLAGS.model_dir,
use_gpu=FLAGS.use_gpu, use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
use_dynamic_shape=FLAGS.use_dynamic_shape, use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape, trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape, trt_max_shape=FLAGS.trt_max_shape,
...@@ -581,6 +588,7 @@ def main(): ...@@ -581,6 +588,7 @@ def main():
FLAGS.model_dir, FLAGS.model_dir,
use_gpu=FLAGS.use_gpu, use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode, run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
use_dynamic_shape=FLAGS.use_dynamic_shape, use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape, trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape, trt_max_shape=FLAGS.trt_max_shape,
...@@ -615,7 +623,7 @@ def main(): ...@@ -615,7 +623,7 @@ def main():
'precision': mode.split('_')[-1] 'precision': mode.split('_')[-1]
} }
data_info = { data_info = {
'batch_size': 1, 'batch_size': FLAGS.batch_size,
'shape': "dynamic_shape", 'shape': "dynamic_shape",
'data_num': perf_info['img_num'] 'data_num': perf_info['img_num']
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册