From 01cb2ee8e55300c0a968fc0f07d3c19188a5e17b Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Fri, 21 May 2021 15:34:37 +0800 Subject: [PATCH] [cherry-pick]fix batch_size when trt infer (#3104) * fix batch_size when trt infer --- deploy/cpp/docs/Jetson_build.md | 1 + deploy/cpp/docs/linux_build.md | 1 + deploy/cpp/docs/windows_vs2019_build.md | 3 ++- deploy/cpp/include/object_detector.h | 3 ++- deploy/cpp/src/main.cc | 6 +++--- deploy/python/README.md | 1 + deploy/python/infer.py | 10 +++++++++- 7 files changed, 19 insertions(+), 6 deletions(-) diff --git a/deploy/cpp/docs/Jetson_build.md b/deploy/cpp/docs/Jetson_build.md index 04c1be493..9acd5de37 100644 --- a/deploy/cpp/docs/Jetson_build.md +++ b/deploy/cpp/docs/Jetson_build.md @@ -162,6 +162,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/ | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size |预测时的batch size,在指定`image_dir`时有效 | | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 | diff --git a/deploy/cpp/docs/linux_build.md b/deploy/cpp/docs/linux_build.md index 8cbea7301..4f7f691b1 100755 --- a/deploy/cpp/docs/linux_build.md +++ b/deploy/cpp/docs/linux_build.md @@ -104,6 +104,7 @@ make | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | 预测时的batch size,在指定`image_dir`时有效 | | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 | diff --git a/deploy/cpp/docs/windows_vs2019_build.md b/deploy/cpp/docs/windows_vs2019_build.md index fd746ba51..551531db5 100755 --- a/deploy/cpp/docs/windows_vs2019_build.md +++ b/deploy/cpp/docs/windows_vs2019_build.md @@ -54,7 +54,7 @@ cd D:\projects\PaddleDetection\deploy\cpp 2. 使用CMake生成项目文件 -编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): +编译参数的含义说明如下(带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**): | 参数名 | 含义 | | ---- | ---- | @@ -99,6 +99,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release | --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)| | --gpu_id | 指定进行推理的GPU device id(默认值为0)| | --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | 预测时的batch size,在指定`image_dir`时有效 | | --run_benchmark | 是否重复预测来进行benchmark测速 | | --output_dir | 输出图片所在的文件夹, 默认为output | | --use_mkldnn | CPU预测中是否开启MKLDNN加速 | diff --git a/deploy/cpp/include/object_detector.h b/deploy/cpp/include/object_detector.h index 4c4eb1cb1..aeffca0b3 100644 --- a/deploy/cpp/include/object_detector.h +++ b/deploy/cpp/include/object_detector.h @@ -62,6 +62,7 @@ class ObjectDetector { bool use_mkldnn=false, int cpu_threads=1, const std::string& run_mode="fluid", + const int batch_size=1, const int gpu_id=0, bool use_dynamic_shape=false, const int trt_min_shape=1, @@ -83,7 +84,7 @@ class ObjectDetector { threshold_ = config_.draw_threshold_; image_shape_ = config_.image_shape_; preprocessor_.Init(config_.preprocess_info_, image_shape_); - LoadModel(model_dir, 1, run_mode); + LoadModel(model_dir, batch_size, run_mode); } // Load Paddle inference model diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc index 9d54c06f2..67cecc45a 100644 --- a/deploy/cpp/src/main.cc +++ b/deploy/cpp/src/main.cc @@ -72,7 +72,7 @@ void PrintBenchmarkLog(std::vector det_time, int img_num){ LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False"); LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads; LOG(INFO) << "----------------------- Data info -----------------------"; - LOG(INFO) << "batch_size: " << 1; + LOG(INFO) << "batch_size: " << FLAGS_batch_size; LOG(INFO) << "input_shape: " << "dynamic shape"; LOG(INFO) << "----------------------- Model info -----------------------"; FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1); @@ -332,7 +332,7 @@ void PredictImage(const std::vector all_img_paths, if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) { output_path += OS_PATH_SEP; } - std::string image_file_path = all_img_paths.at(idx * batch_size+bs); + std::string image_file_path = all_img_paths.at(idx * batch_size + bs); output_path += image_file_path.substr(image_file_path.find_last_of('/') + 1); cv::imwrite(output_path, vis_img, compression_params); printf("Visualized output saved as %s\n", output_path.c_str()); @@ -361,7 +361,7 @@ int main(int argc, char** argv) { } // Load model and create a object detector PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn, - FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_gpu_id, FLAGS_use_dynamic_shape, + FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_use_dynamic_shape, FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode); // Do inference on input video or image if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { diff --git a/deploy/python/README.md b/deploy/python/README.md index 386c05ce5..43622a363 100644 --- a/deploy/python/README.md +++ b/deploy/python/README.md @@ -35,6 +35,7 @@ python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsi | --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4| | --use_gpu | No |是否GPU,默认为False| | --run_mode | No |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)| +| --batch_size | No |预测时的batch size,在指定`image_dir`时有效 | | --threshold | No|预测得分的阈值,默认为0.5| | --output_dir | No|可视化结果保存的根目录,默认为output/| | --run_benchmark | No| 是否运行benchmark,同时需指定`--image_file`或`--image_dir` | diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 7dfa4fd8d..c92ab89c6 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -50,6 +50,7 @@ class Detector(object): model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -63,6 +64,7 @@ class Detector(object): model_dir, use_gpu=False, run_mode='fluid', + batch_size=1, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, @@ -74,6 +76,7 @@ class Detector(object): self.predictor, self.config = load_predictor( model_dir, run_mode=run_mode, + batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, use_dynamic_shape=use_dynamic_shape, @@ -186,6 +189,7 @@ class DetectorSOLOv2(Detector): model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + batch_size (int): size of pre batch in inference use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt @@ -198,6 +202,7 @@ class DetectorSOLOv2(Detector): model_dir, use_gpu=False, run_mode='fluid', + batch_size=1, use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, @@ -209,6 +214,7 @@ class DetectorSOLOv2(Detector): self.predictor, self.config = load_predictor( model_dir, run_mode=run_mode, + batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, use_dynamic_shape=use_dynamic_shape, @@ -568,6 +574,7 @@ def main(): FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, @@ -581,6 +588,7 @@ def main(): FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, + batch_size=FLAGS.batch_size, use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, @@ -615,7 +623,7 @@ def main(): 'precision': mode.split('_')[-1] } data_info = { - 'batch_size': 1, + 'batch_size': FLAGS.batch_size, 'shape': "dynamic_shape", 'data_num': perf_info['img_num'] } -- GitLab