From bd99ac42165df2afff07aac637a33f064ea0dd75 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Mon, 24 May 2021 12:58:56 +0800 Subject: [PATCH] [cherry-pick] Automatically set use_dynamic_shape (#3138) * Automatically set use_dynamic_shape --- deploy/TENSOR_RT.md | 24 ++++++++++++----------- deploy/cpp/include/config_parser.h | 9 +++++++++ deploy/cpp/include/object_detector.h | 3 +-- deploy/cpp/src/main.cc | 13 ++++++------ deploy/python/infer.py | 11 +++-------- deploy/python/keypoint_det_unite_infer.py | 2 -- deploy/python/keypoint_infer.py | 6 ++---- deploy/python/topdown_unite_utils.py | 5 ----- deploy/python/utils.py | 5 ----- ppdet/engine/export_utils.py | 2 ++ 10 files changed, 37 insertions(+), 43 deletions(-) diff --git a/deploy/TENSOR_RT.md b/deploy/TENSOR_RT.md index 225f252ba..019126b6d 100644 --- a/deploy/TENSOR_RT.md +++ b/deploy/TENSOR_RT.md @@ -34,31 +34,33 @@ config->EnableTensorRtEngine(1 << 20 /*workspace_size*/, ### 3.2 TensorRT固定尺寸预测 -在导出模型时指定模型输入尺寸,设置`TestReader.inputs_def.image_shape=[3,640,640]`,具体请参考[PaddleDetection模型导出教程](../EXPORT_MODEL.md) 。 - -`TestReader.inputs_def.image_shape`设置的是输入TensorRT引擎的数据尺寸(在像FasterRCNN中,`TestReader.inputs_def.image_shape`指定的是在`Pad`操作之前的图像数据尺寸)。 +例如在模型Reader配置文件中设置: +```yaml +TestReader: + inputs_def: + image_shape: [3,608,608] + ... +``` +或者在导出模型时设置`-o TestReader.inputs_def.image_shape=[3,608,608]`,模型将会进行固定尺寸预测,具体请参考[PaddleDetection模型导出教程](../EXPORT_MODEL.md) 。 可以通过[visualdl](https://www.paddlepaddle.org.cn/paddle/visualdl/demo/graph) 打开`model.pdmodel`文件,查看输入的第一个Tensor尺寸是否是固定的,如果不指定,尺寸会用`?`表示,如下图所示: ![img](../docs/images/input_shape.png) -同时需要将图像预处理后的尺寸与设置车模型输入尺寸保持一致,需要设置`infer_cfg.yml`配置文件中`Resize OP`的`target_size`参数和`keep_ratio`参数。 -注意:由于TesnorRT不支持在batch维度进行slice操作,Faster RCNN 和 Mask RCNN使用固定尺寸输入预测会报错,这两个模型请使用动态尺寸输入。 +注意:由于TesnorRT不支持在batch维度进行slice操作,Faster RCNN 和 Mask RCNN不能使用固定尺寸输入预测,所以不能设置`TestReader.inputs_def.image_shape`字段。 -以`YOLOv3`为例,使用动态尺寸输入预测: +以`YOLOv3`为例,使用固定尺寸输入预测: ``` -python python/infer.py --model_dir=../inference_model/yolov3_darknet53_270e_coco/ --image_file=../demo/000000014439_640x640.jpg --use_gpu=True --run_mode=trt_fp32 --run_benchmark=True +python python/infer.py --model_dir=../inference_model/yolov3_darknet53_270e_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp32 --run_benchmark=True ``` ### 3.3 TensorRT动态尺寸预测 -TensorRT版本>=6时,使用TensorRT预测时,可以支持动态尺寸输入。 +TensorRT版本>=6时,使用TensorRT预测时,可以支持动态尺寸输入。如果模型Reader配置文件中没有设置例如`TestReader.inputs_def.image_shape=[3,608,608]`的字段,或者`image_shape=[3.-1,-1]`,导出模型将以动态尺寸进行预测。一般RCNN系列模型使用动态图尺寸预测。 Paddle预测库关于动态尺寸输入请查看[Paddle CPP预测](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/native_infer.html) 的`SetTRTDynamicShapeInfo`函数说明。 `python/infer.py`设置动态尺寸输入参数说明: -- use_dynamic_shape 用于设定TensorRT的输入尺寸是否是动态尺寸,默认值:False - - trt_min_shape 用于设定TensorRT的输入图像height、width中的最小尺寸,默认值:1 - trt_max_shape 用于设定TensorRT的输入图像height、width中的最大尺寸,默认值:1280 @@ -69,7 +71,7 @@ Paddle预测库关于动态尺寸输入请查看[Paddle CPP预测](https://www.p 以`Faster RCNN`为例,使用动态尺寸输入预测: ``` -python python/infer.py --model_dir=../inference_model/faster_rcnn_r50_fpn_1x_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp16 --run_benchmark=True --use_dynamic_shape=True --trt_max_shape=1280 --trt_min_shape=800 --trt_opt_shape=960 +python python/infer.py --model_dir=../inference_model/faster_rcnn_r50_fpn_1x_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp16 --run_benchmark=True --trt_max_shape=1280 --trt_min_shape=800 --trt_opt_shape=960 ``` ## 4、常见问题QA diff --git a/deploy/cpp/include/config_parser.h b/deploy/cpp/include/config_parser.h index 3392b3ed7..661b2d2dc 100644 --- a/deploy/cpp/include/config_parser.h +++ b/deploy/cpp/include/config_parser.h @@ -91,6 +91,14 @@ class ConfigPaser { return false; } + // Get use_dynamic_shape for TensorRT + if (config["use_dynamic_shape"].IsDefined()) { + use_dynamic_shape_ = config["use_dynamic_shape"].as(); + } else { + std::cerr << "Please set use_dynamic_shape." << std::endl; + return false; + } + return true; } std::string mode_; @@ -99,6 +107,7 @@ class ConfigPaser { int min_subgraph_size_; YAML::Node preprocess_info_; std::vector label_list_; + bool use_dynamic_shape_; }; } // namespace PaddleDetection diff --git a/deploy/cpp/include/object_detector.h b/deploy/cpp/include/object_detector.h index 572224a3b..0a5c4d9a1 100644 --- a/deploy/cpp/include/object_detector.h +++ b/deploy/cpp/include/object_detector.h @@ -64,7 +64,6 @@ class ObjectDetector { const std::string& run_mode="fluid", const int batch_size=1, const int gpu_id=0, - bool use_dynamic_shape=false, const int trt_min_shape=1, const int trt_max_shape=1280, const int trt_opt_shape=640, @@ -74,12 +73,12 @@ class ObjectDetector { this->cpu_math_library_num_threads_ = cpu_threads; this->use_mkldnn_ = use_mkldnn; - this->use_dynamic_shape_ = use_dynamic_shape; this->trt_min_shape_ = trt_min_shape; this->trt_max_shape_ = trt_max_shape; this->trt_opt_shape_ = trt_opt_shape; this->trt_calib_mode_ = trt_calib_mode; config_.load_config(model_dir); + this->use_dynamic_shape_ = config_.use_dynamic_shape_; this->min_subgraph_size_ = config_.min_subgraph_size_; threshold_ = config_.draw_threshold_; preprocessor_.Init(config_.preprocess_info_); diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc index 67cecc45a..b429a8727 100644 --- a/deploy/cpp/src/main.cc +++ b/deploy/cpp/src/main.cc @@ -49,7 +49,6 @@ DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); -DEFINE_bool(use_dynamic_shape, false, "Trt use dynamic shape or not"); DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); @@ -361,8 +360,9 @@ int main(int argc, char** argv) { } // Load model and create a object detector PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn, - FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_use_dynamic_shape, - FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode); + FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, + FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, + FLAGS_trt_calib_mode); // Do inference on input video or image if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { PredictVideo(FLAGS_video_file, &det); @@ -374,13 +374,14 @@ int main(int argc, char** argv) { if (!FLAGS_image_file.empty()) { all_imgs.push_back(FLAGS_image_file); if (FLAGS_batch_size > 1) { - std::cout << "batch_size should be 1, when image_file is not None" << std::endl; - FLAGS_batch_size = 1; + std::cout << "batch_size should be 1, when set `image_file`." << std::endl; + return -1; } } else { GetAllFiles((char *)FLAGS_image_dir.c_str(), all_imgs); } - PredictImage(all_imgs, FLAGS_batch_size, FLAGS_threshold, FLAGS_run_benchmark, &det, FLAGS_output_dir); + PredictImage(all_imgs, FLAGS_batch_size, FLAGS_threshold, + FLAGS_run_benchmark, &det, FLAGS_output_dir); } return 0; } diff --git a/deploy/python/infer.py b/deploy/python/infer.py index 6ea92ed3d..fd1d16d59 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -51,7 +51,6 @@ class Detector(object): use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference - use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -65,7 +64,6 @@ class Detector(object): use_gpu=False, run_mode='fluid', batch_size=1, - use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, @@ -79,7 +77,7 @@ class Detector(object): batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, - use_dynamic_shape=use_dynamic_shape, + use_dynamic_shape=self.pred_config.use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, @@ -189,7 +187,6 @@ class DetectorSOLOv2(Detector): use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference - use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -202,7 +199,6 @@ class DetectorSOLOv2(Detector): use_gpu=False, run_mode='fluid', batch_size=1, - use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, @@ -216,7 +212,7 @@ class DetectorSOLOv2(Detector): batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, - use_dynamic_shape=use_dynamic_shape, + use_dynamic_shape=self.pred_config.use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, @@ -328,6 +324,7 @@ class PredictConfig(): self.min_subgraph_size = yml_conf['min_subgraph_size'] self.labels = yml_conf['label_list'] self.mask = False + self.use_dynamic_shape = yml_conf['use_dynamic_shape'] if 'mask' in yml_conf: self.mask = yml_conf['mask'] self.print_config() @@ -573,7 +570,6 @@ def main(): use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, batch_size=FLAGS.batch_size, - use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, @@ -587,7 +583,6 @@ def main(): use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, batch_size=FLAGS.batch_size, - use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, diff --git a/deploy/python/keypoint_det_unite_infer.py b/deploy/python/keypoint_det_unite_infer.py index dd80b290f..6ff335971 100644 --- a/deploy/python/keypoint_det_unite_infer.py +++ b/deploy/python/keypoint_det_unite_infer.py @@ -158,7 +158,6 @@ def main(): FLAGS.det_model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, - use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, @@ -172,7 +171,6 @@ def main(): FLAGS.keypoint_model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, - use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py index 0b7ece269..c1f5e15bf 100644 --- a/deploy/python/keypoint_infer.py +++ b/deploy/python/keypoint_infer.py @@ -46,7 +46,6 @@ class KeyPoint_Detector(object): model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) - use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -59,7 +58,6 @@ class KeyPoint_Detector(object): model_dir, use_gpu=False, run_mode='fluid', - use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, @@ -72,7 +70,7 @@ class KeyPoint_Detector(object): run_mode=run_mode, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, - use_dynamic_shape=use_dynamic_shape, + use_dynamic_shape=self.pred_config.use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, @@ -210,6 +208,7 @@ class PredictConfig_KeyPoint(): self.min_subgraph_size = yml_conf['min_subgraph_size'] self.labels = yml_conf['label_list'] self.tagmap = False + self.use_dynamic_shape = yml_conf['use_dynamic_shape'] if 'keypoint_bottomup' == self.archcls: self.tagmap = True self.print_config() @@ -384,7 +383,6 @@ def main(): FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, - use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, diff --git a/deploy/python/topdown_unite_utils.py b/deploy/python/topdown_unite_utils.py index ab483109f..02d3c6049 100644 --- a/deploy/python/topdown_unite_utils.py +++ b/deploy/python/topdown_unite_utils.py @@ -84,11 +84,6 @@ def argsparser(): help="Whether use mkldnn with CPU.") parser.add_argument( "--cpu_threads", type=int, default=1, help="Num of threads with CPU.") - parser.add_argument( - "--use_dynamic_shape", - type=ast.literal_eval, - default=False, - help="Dynamic_shape for TensorRT.") parser.add_argument( "--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") parser.add_argument( diff --git a/deploy/python/utils.py b/deploy/python/utils.py index b7afcd985..aedcab194 100644 --- a/deploy/python/utils.py +++ b/deploy/python/utils.py @@ -76,11 +76,6 @@ def argsparser(): help="Whether use mkldnn with CPU.") parser.add_argument( "--cpu_threads", type=int, default=1, help="Num of threads with CPU.") - parser.add_argument( - "--use_dynamic_shape", - type=ast.literal_eval, - default=False, - help="Dynamic_shape for TensorRT.") parser.add_argument( "--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") parser.add_argument( diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py index 04506dc9d..40a4fdc98 100644 --- a/ppdet/engine/export_utils.py +++ b/ppdet/engine/export_utils.py @@ -81,10 +81,12 @@ def _dump_infer_config(config, path, image_shape, model): arch_state = False from ppdet.core.config.yaml_helpers import setup_orderdict setup_orderdict() + use_dynamic_shape = True if image_shape[1] == -1 else False infer_cfg = OrderedDict({ 'mode': 'fluid', 'draw_threshold': 0.5, 'metric': config['metric'], + 'use_dynamic_shape': use_dynamic_shape }) infer_arch = config['architecture'] -- GitLab