未验证 提交 a0663646 编写于 作者: G Guanghua Yu 提交者: GitHub

fix batch_size when trt infer (#3102)

* fix batch_size when trt infer
上级 6f88fcab
......@@ -162,6 +162,7 @@ CUDNN_LIB=/usr/lib/aarch64-linux-gnu/
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size |预测时的batch size,在指定`image_dir`时有效 |
| --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
......
......@@ -104,6 +104,7 @@ make
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
| --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
......
......@@ -54,7 +54,7 @@ cd D:\projects\PaddleDetection\deploy\cpp
2. 使用CMake生成项目文件
编译参数的含义说明如下(带*表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**):
编译参数的含义说明如下(带`*`表示仅在使用**GPU版本**预测库时指定, 其中CUDA库版本尽量对齐,**使用9.0、10.0版本,不使用9.2、10.1等版本CUDA库**):
| 参数名 | 含义 |
| ---- | ---- |
......@@ -99,6 +99,7 @@ cd D:\projects\PaddleDetection\deploy\cpp\out\build\x64-Release
| --use_gpu | 是否使用 GPU 预测, 支持值为0或1(默认值为0)|
| --gpu_id | 指定进行推理的GPU device id(默认值为0)|
| --run_mode | 使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | 预测时的batch size,在指定`image_dir`时有效 |
| --run_benchmark | 是否重复预测来进行benchmark测速 |
| --output_dir | 输出图片所在的文件夹, 默认为output |
| --use_mkldnn | CPU预测中是否开启MKLDNN加速 |
......
......@@ -62,6 +62,7 @@ class ObjectDetector {
bool use_mkldnn=false,
int cpu_threads=1,
const std::string& run_mode="fluid",
const int batch_size=1,
const int gpu_id=0,
bool use_dynamic_shape=false,
const int trt_min_shape=1,
......@@ -83,7 +84,7 @@ class ObjectDetector {
threshold_ = config_.draw_threshold_;
image_shape_ = config_.image_shape_;
preprocessor_.Init(config_.preprocess_info_, image_shape_);
LoadModel(model_dir, 1, run_mode);
LoadModel(model_dir, batch_size, run_mode);
}
// Load Paddle inference model
......
......@@ -72,7 +72,7 @@ void PrintBenchmarkLog(std::vector<double> det_time, int img_num){
LOG(INFO) << "enable_mkldnn: " << (FLAGS_use_mkldnn ? "True" : "False");
LOG(INFO) << "cpu_math_library_num_threads: " << FLAGS_cpu_threads;
LOG(INFO) << "----------------------- Data info -----------------------";
LOG(INFO) << "batch_size: " << 1;
LOG(INFO) << "batch_size: " << FLAGS_batch_size;
LOG(INFO) << "input_shape: " << "dynamic shape";
LOG(INFO) << "----------------------- Model info -----------------------";
FLAGS_model_dir.erase(FLAGS_model_dir.find_last_not_of("/") + 1);
......@@ -332,7 +332,7 @@ void PredictImage(const std::vector<std::string> all_img_paths,
if (output_dir.rfind(OS_PATH_SEP) != output_dir.size() - 1) {
output_path += OS_PATH_SEP;
}
std::string image_file_path = all_img_paths.at(idx * batch_size+bs);
std::string image_file_path = all_img_paths.at(idx * batch_size + bs);
output_path += image_file_path.substr(image_file_path.find_last_of('/') + 1);
cv::imwrite(output_path, vis_img, compression_params);
printf("Visualized output saved as %s\n", output_path.c_str());
......@@ -361,7 +361,7 @@ int main(int argc, char** argv) {
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_gpu_id, FLAGS_use_dynamic_shape,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_use_dynamic_shape,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode);
// Do inference on input video or image
if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
......
......@@ -35,6 +35,7 @@ python deploy/python/infer.py --model_dir=./inference/yolov3_mobilenet_v1_roadsi
| --camera_id | Option | 用来预测的摄像头ID,默认为-1(表示不使用摄像头预测,可设置为:0 - (摄像头数目-1) ),预测过程中在可视化界面按`q`退出输出预测结果到:output/output.mp4|
| --use_gpu | No |是否GPU,默认为False|
| --run_mode | No |使用GPU时,默认为fluid, 可选(fluid/trt_fp32/trt_fp16/trt_int8)|
| --batch_size | No |预测时的batch size,在指定`image_dir`时有效 |
| --threshold | No|预测得分的阈值,默认为0.5|
| --output_dir | No|可视化结果保存的根目录,默认为output/|
| --run_benchmark | No| 是否运行benchmark,同时需指定`--image_file``--image_dir` |
......
......@@ -50,6 +50,7 @@ class Detector(object):
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
......@@ -63,6 +64,7 @@ class Detector(object):
model_dir,
use_gpu=False,
run_mode='fluid',
batch_size=1,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
......@@ -74,6 +76,7 @@ class Detector(object):
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
use_dynamic_shape=use_dynamic_shape,
......@@ -186,6 +189,7 @@ class DetectorSOLOv2(Detector):
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
......@@ -198,6 +202,7 @@ class DetectorSOLOv2(Detector):
model_dir,
use_gpu=False,
run_mode='fluid',
batch_size=1,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
......@@ -209,6 +214,7 @@ class DetectorSOLOv2(Detector):
self.predictor, self.config = load_predictor(
model_dir,
run_mode=run_mode,
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
use_dynamic_shape=use_dynamic_shape,
......@@ -568,6 +574,7 @@ def main():
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
......@@ -581,6 +588,7 @@ def main():
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
......@@ -615,7 +623,7 @@ def main():
'precision': mode.split('_')[-1]
}
data_info = {
'batch_size': 1,
'batch_size': FLAGS.batch_size,
'shape': "dynamic_shape",
'data_num': perf_info['img_num']
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册