diff --git a/deploy/TENSOR_RT.md b/deploy/TENSOR_RT.md index 019126b6de336e97d6766cbf555e8d28a6121f78..225f252ba4693bc5141b94eabf11e3f2989e4749 100644 --- a/deploy/TENSOR_RT.md +++ b/deploy/TENSOR_RT.md @@ -34,33 +34,31 @@ config->EnableTensorRtEngine(1 << 20 /*workspace_size*/, ### 3.2 TensorRT固定尺寸预测 -例如在模型Reader配置文件中设置: -```yaml -TestReader: - inputs_def: - image_shape: [3,608,608] - ... -``` -或者在导出模型时设置`-o TestReader.inputs_def.image_shape=[3,608,608]`,模型将会进行固定尺寸预测,具体请参考[PaddleDetection模型导出教程](../EXPORT_MODEL.md) 。 +在导出模型时指定模型输入尺寸,设置`TestReader.inputs_def.image_shape=[3,640,640]`,具体请参考[PaddleDetection模型导出教程](../EXPORT_MODEL.md) 。 + +`TestReader.inputs_def.image_shape`设置的是输入TensorRT引擎的数据尺寸(在像FasterRCNN中,`TestReader.inputs_def.image_shape`指定的是在`Pad`操作之前的图像数据尺寸)。 可以通过[visualdl](https://www.paddlepaddle.org.cn/paddle/visualdl/demo/graph) 打开`model.pdmodel`文件,查看输入的第一个Tensor尺寸是否是固定的,如果不指定,尺寸会用`?`表示,如下图所示: ![img](../docs/images/input_shape.png) +同时需要将图像预处理后的尺寸与设置车模型输入尺寸保持一致,需要设置`infer_cfg.yml`配置文件中`Resize OP`的`target_size`参数和`keep_ratio`参数。 -注意:由于TesnorRT不支持在batch维度进行slice操作,Faster RCNN 和 Mask RCNN不能使用固定尺寸输入预测,所以不能设置`TestReader.inputs_def.image_shape`字段。 +注意:由于TesnorRT不支持在batch维度进行slice操作,Faster RCNN 和 Mask RCNN使用固定尺寸输入预测会报错,这两个模型请使用动态尺寸输入。 -以`YOLOv3`为例,使用固定尺寸输入预测: +以`YOLOv3`为例,使用动态尺寸输入预测: ``` -python python/infer.py --model_dir=../inference_model/yolov3_darknet53_270e_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp32 --run_benchmark=True +python python/infer.py --model_dir=../inference_model/yolov3_darknet53_270e_coco/ --image_file=../demo/000000014439_640x640.jpg --use_gpu=True --run_mode=trt_fp32 --run_benchmark=True ``` ### 3.3 TensorRT动态尺寸预测 -TensorRT版本>=6时,使用TensorRT预测时,可以支持动态尺寸输入。如果模型Reader配置文件中没有设置例如`TestReader.inputs_def.image_shape=[3,608,608]`的字段,或者`image_shape=[3.-1,-1]`,导出模型将以动态尺寸进行预测。一般RCNN系列模型使用动态图尺寸预测。 +TensorRT版本>=6时,使用TensorRT预测时,可以支持动态尺寸输入。 Paddle预测库关于动态尺寸输入请查看[Paddle CPP预测](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/native_infer.html) 的`SetTRTDynamicShapeInfo`函数说明。 `python/infer.py`设置动态尺寸输入参数说明: +- use_dynamic_shape 用于设定TensorRT的输入尺寸是否是动态尺寸,默认值:False + - trt_min_shape 用于设定TensorRT的输入图像height、width中的最小尺寸,默认值:1 - trt_max_shape 用于设定TensorRT的输入图像height、width中的最大尺寸,默认值:1280 @@ -71,7 +69,7 @@ Paddle预测库关于动态尺寸输入请查看[Paddle CPP预测](https://www.p 以`Faster RCNN`为例,使用动态尺寸输入预测: ``` -python python/infer.py --model_dir=../inference_model/faster_rcnn_r50_fpn_1x_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp16 --run_benchmark=True --trt_max_shape=1280 --trt_min_shape=800 --trt_opt_shape=960 +python python/infer.py --model_dir=../inference_model/faster_rcnn_r50_fpn_1x_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp16 --run_benchmark=True --use_dynamic_shape=True --trt_max_shape=1280 --trt_min_shape=800 --trt_opt_shape=960 ``` ## 4、常见问题QA diff --git a/deploy/cpp/include/config_parser.h b/deploy/cpp/include/config_parser.h index 661b2d2dc2932990accc8a97b2a3f315716e5f1e..3392b3ed795615563b4c062e99ba616002e0e8e0 100644 --- a/deploy/cpp/include/config_parser.h +++ b/deploy/cpp/include/config_parser.h @@ -91,14 +91,6 @@ class ConfigPaser { return false; } - // Get use_dynamic_shape for TensorRT - if (config["use_dynamic_shape"].IsDefined()) { - use_dynamic_shape_ = config["use_dynamic_shape"].as(); - } else { - std::cerr << "Please set use_dynamic_shape." << std::endl; - return false; - } - return true; } std::string mode_; @@ -107,7 +99,6 @@ class ConfigPaser { int min_subgraph_size_; YAML::Node preprocess_info_; std::vector label_list_; - bool use_dynamic_shape_; }; } // namespace PaddleDetection diff --git a/deploy/cpp/include/object_detector.h b/deploy/cpp/include/object_detector.h index 0a5c4d9a187878f22b5805d03e35ade9145d2a15..572224a3ba58fc37be822b305415cbc862736254 100644 --- a/deploy/cpp/include/object_detector.h +++ b/deploy/cpp/include/object_detector.h @@ -64,6 +64,7 @@ class ObjectDetector { const std::string& run_mode="fluid", const int batch_size=1, const int gpu_id=0, + bool use_dynamic_shape=false, const int trt_min_shape=1, const int trt_max_shape=1280, const int trt_opt_shape=640, @@ -73,12 +74,12 @@ class ObjectDetector { this->cpu_math_library_num_threads_ = cpu_threads; this->use_mkldnn_ = use_mkldnn; + this->use_dynamic_shape_ = use_dynamic_shape; this->trt_min_shape_ = trt_min_shape; this->trt_max_shape_ = trt_max_shape; this->trt_opt_shape_ = trt_opt_shape; this->trt_calib_mode_ = trt_calib_mode; config_.load_config(model_dir); - this->use_dynamic_shape_ = config_.use_dynamic_shape_; this->min_subgraph_size_ = config_.min_subgraph_size_; threshold_ = config_.draw_threshold_; preprocessor_.Init(config_.preprocess_info_); diff --git a/deploy/cpp/src/main.cc b/deploy/cpp/src/main.cc index b429a8727dd340195c12484ca7c2dea449dd247d..67cecc45ad0b177f5211abe58ffe7f11882d9f3d 100644 --- a/deploy/cpp/src/main.cc +++ b/deploy/cpp/src/main.cc @@ -49,6 +49,7 @@ DEFINE_int32(gpu_id, 0, "Device id of GPU to execute"); DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark"); DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU"); DEFINE_int32(cpu_threads, 1, "Num of threads with CPU"); +DEFINE_bool(use_dynamic_shape, false, "Trt use dynamic shape or not"); DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI"); DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI"); DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI"); @@ -360,9 +361,8 @@ int main(int argc, char** argv) { } // Load model and create a object detector PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn, - FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, - FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, - FLAGS_trt_calib_mode); + FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_use_dynamic_shape, + FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode); // Do inference on input video or image if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) { PredictVideo(FLAGS_video_file, &det); @@ -374,14 +374,13 @@ int main(int argc, char** argv) { if (!FLAGS_image_file.empty()) { all_imgs.push_back(FLAGS_image_file); if (FLAGS_batch_size > 1) { - std::cout << "batch_size should be 1, when set `image_file`." << std::endl; - return -1; + std::cout << "batch_size should be 1, when image_file is not None" << std::endl; + FLAGS_batch_size = 1; } } else { GetAllFiles((char *)FLAGS_image_dir.c_str(), all_imgs); } - PredictImage(all_imgs, FLAGS_batch_size, FLAGS_threshold, - FLAGS_run_benchmark, &det, FLAGS_output_dir); + PredictImage(all_imgs, FLAGS_batch_size, FLAGS_threshold, FLAGS_run_benchmark, &det, FLAGS_output_dir); } return 0; } diff --git a/deploy/python/infer.py b/deploy/python/infer.py index fd1d16d59af4c324c9c07597b2137fd4d17c8726..6ea92ed3d8e63f7403b97e1da739741d38166760 100644 --- a/deploy/python/infer.py +++ b/deploy/python/infer.py @@ -51,6 +51,7 @@ class Detector(object): use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference + use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -64,6 +65,7 @@ class Detector(object): use_gpu=False, run_mode='fluid', batch_size=1, + use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, @@ -77,7 +79,7 @@ class Detector(object): batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, - use_dynamic_shape=self.pred_config.use_dynamic_shape, + use_dynamic_shape=use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, @@ -187,6 +189,7 @@ class DetectorSOLOv2(Detector): use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) batch_size (int): size of pre batch in inference + use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -199,6 +202,7 @@ class DetectorSOLOv2(Detector): use_gpu=False, run_mode='fluid', batch_size=1, + use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, @@ -212,7 +216,7 @@ class DetectorSOLOv2(Detector): batch_size=batch_size, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, - use_dynamic_shape=self.pred_config.use_dynamic_shape, + use_dynamic_shape=use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, @@ -324,7 +328,6 @@ class PredictConfig(): self.min_subgraph_size = yml_conf['min_subgraph_size'] self.labels = yml_conf['label_list'] self.mask = False - self.use_dynamic_shape = yml_conf['use_dynamic_shape'] if 'mask' in yml_conf: self.mask = yml_conf['mask'] self.print_config() @@ -570,6 +573,7 @@ def main(): use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, batch_size=FLAGS.batch_size, + use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, @@ -583,6 +587,7 @@ def main(): use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, batch_size=FLAGS.batch_size, + use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, diff --git a/deploy/python/keypoint_det_unite_infer.py b/deploy/python/keypoint_det_unite_infer.py index 6ff335971ff0521a52dadef9155dd5a4e9deff0a..dd80b290ff89377562dfb7149ed05a9257fd079b 100644 --- a/deploy/python/keypoint_det_unite_infer.py +++ b/deploy/python/keypoint_det_unite_infer.py @@ -158,6 +158,7 @@ def main(): FLAGS.det_model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, + use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, @@ -171,6 +172,7 @@ def main(): FLAGS.keypoint_model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, + use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, diff --git a/deploy/python/keypoint_infer.py b/deploy/python/keypoint_infer.py index c1f5e15bf0237a6a2a60e6345759afb03f4110ae..0b7ece269c114cdf919c43f8c64cb4b86e1677f8 100644 --- a/deploy/python/keypoint_infer.py +++ b/deploy/python/keypoint_infer.py @@ -46,6 +46,7 @@ class KeyPoint_Detector(object): model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml use_gpu (bool): whether use gpu run_mode (str): mode of running(fluid/trt_fp32/trt_fp16) + use_dynamic_shape (bool): use dynamic shape or not trt_min_shape (int): min shape for dynamic shape in trt trt_max_shape (int): max shape for dynamic shape in trt trt_opt_shape (int): opt shape for dynamic shape in trt @@ -58,6 +59,7 @@ class KeyPoint_Detector(object): model_dir, use_gpu=False, run_mode='fluid', + use_dynamic_shape=False, trt_min_shape=1, trt_max_shape=1280, trt_opt_shape=640, @@ -70,7 +72,7 @@ class KeyPoint_Detector(object): run_mode=run_mode, min_subgraph_size=self.pred_config.min_subgraph_size, use_gpu=use_gpu, - use_dynamic_shape=self.pred_config.use_dynamic_shape, + use_dynamic_shape=use_dynamic_shape, trt_min_shape=trt_min_shape, trt_max_shape=trt_max_shape, trt_opt_shape=trt_opt_shape, @@ -208,7 +210,6 @@ class PredictConfig_KeyPoint(): self.min_subgraph_size = yml_conf['min_subgraph_size'] self.labels = yml_conf['label_list'] self.tagmap = False - self.use_dynamic_shape = yml_conf['use_dynamic_shape'] if 'keypoint_bottomup' == self.archcls: self.tagmap = True self.print_config() @@ -383,6 +384,7 @@ def main(): FLAGS.model_dir, use_gpu=FLAGS.use_gpu, run_mode=FLAGS.run_mode, + use_dynamic_shape=FLAGS.use_dynamic_shape, trt_min_shape=FLAGS.trt_min_shape, trt_max_shape=FLAGS.trt_max_shape, trt_opt_shape=FLAGS.trt_opt_shape, diff --git a/deploy/python/topdown_unite_utils.py b/deploy/python/topdown_unite_utils.py index 02d3c604975e2c0b2b5333719d1e7b0c236e0723..ab483109fc28a2dcef299c17f6ffb33d7d72ba40 100644 --- a/deploy/python/topdown_unite_utils.py +++ b/deploy/python/topdown_unite_utils.py @@ -84,6 +84,11 @@ def argsparser(): help="Whether use mkldnn with CPU.") parser.add_argument( "--cpu_threads", type=int, default=1, help="Num of threads with CPU.") + parser.add_argument( + "--use_dynamic_shape", + type=ast.literal_eval, + default=False, + help="Dynamic_shape for TensorRT.") parser.add_argument( "--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") parser.add_argument( diff --git a/deploy/python/utils.py b/deploy/python/utils.py index aedcab194fcbb468cef1550e3f6a77ca93c5aea1..b7afcd9858fb77be9cff94bf201f8cb14fd0912f 100644 --- a/deploy/python/utils.py +++ b/deploy/python/utils.py @@ -76,6 +76,11 @@ def argsparser(): help="Whether use mkldnn with CPU.") parser.add_argument( "--cpu_threads", type=int, default=1, help="Num of threads with CPU.") + parser.add_argument( + "--use_dynamic_shape", + type=ast.literal_eval, + default=False, + help="Dynamic_shape for TensorRT.") parser.add_argument( "--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.") parser.add_argument( diff --git a/ppdet/engine/export_utils.py b/ppdet/engine/export_utils.py index 40a4fdc98a56840d3bc483b4f7936de01e37e087..04506dc9d0bee9eebf356984ec7692d26ec4b32a 100644 --- a/ppdet/engine/export_utils.py +++ b/ppdet/engine/export_utils.py @@ -81,12 +81,10 @@ def _dump_infer_config(config, path, image_shape, model): arch_state = False from ppdet.core.config.yaml_helpers import setup_orderdict setup_orderdict() - use_dynamic_shape = True if image_shape[1] == -1 else False infer_cfg = OrderedDict({ 'mode': 'fluid', 'draw_threshold': 0.5, 'metric': config['metric'], - 'use_dynamic_shape': use_dynamic_shape }) infer_arch = config['architecture']