未验证 提交 bd99ac42 编写于 作者: G Guanghua Yu 提交者: GitHub

[cherry-pick] Automatically set use_dynamic_shape (#3138)

* Automatically set use_dynamic_shape
上级 be0ca534
......@@ -34,31 +34,33 @@ config->EnableTensorRtEngine(1 << 20 /*workspace_size*/,
### 3.2 TensorRT固定尺寸预测
在导出模型时指定模型输入尺寸,设置`TestReader.inputs_def.image_shape=[3,640,640]`,具体请参考[PaddleDetection模型导出教程](../EXPORT_MODEL.md)
`TestReader.inputs_def.image_shape`设置的是输入TensorRT引擎的数据尺寸(在像FasterRCNN中,`TestReader.inputs_def.image_shape`指定的是在`Pad`操作之前的图像数据尺寸)。
例如在模型Reader配置文件中设置:
```yaml
TestReader:
inputs_def:
image_shape: [3,608,608]
...
```
或者在导出模型时设置`-o TestReader.inputs_def.image_shape=[3,608,608]`,模型将会进行固定尺寸预测,具体请参考[PaddleDetection模型导出教程](../EXPORT_MODEL.md)
可以通过[visualdl](https://www.paddlepaddle.org.cn/paddle/visualdl/demo/graph) 打开`model.pdmodel`文件,查看输入的第一个Tensor尺寸是否是固定的,如果不指定,尺寸会用`?`表示,如下图所示:
![img](../docs/images/input_shape.png)
同时需要将图像预处理后的尺寸与设置车模型输入尺寸保持一致,需要设置`infer_cfg.yml`配置文件中`Resize OP``target_size`参数和`keep_ratio`参数。
注意:由于TesnorRT不支持在batch维度进行slice操作,Faster RCNN 和 Mask RCNN使用固定尺寸输入预测会报错,这两个模型请使用动态尺寸输入
注意:由于TesnorRT不支持在batch维度进行slice操作,Faster RCNN 和 Mask RCNN不能使用固定尺寸输入预测,所以不能设置`TestReader.inputs_def.image_shape`字段
`YOLOv3`为例,使用动态尺寸输入预测:
`YOLOv3`为例,使用固定尺寸输入预测:
```
python python/infer.py --model_dir=../inference_model/yolov3_darknet53_270e_coco/ --image_file=../demo/000000014439_640x640.jpg --use_gpu=True --run_mode=trt_fp32 --run_benchmark=True
python python/infer.py --model_dir=../inference_model/yolov3_darknet53_270e_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp32 --run_benchmark=True
```
### 3.3 TensorRT动态尺寸预测
TensorRT版本>=6时,使用TensorRT预测时,可以支持动态尺寸输入。
TensorRT版本>=6时,使用TensorRT预测时,可以支持动态尺寸输入。如果模型Reader配置文件中没有设置例如`TestReader.inputs_def.image_shape=[3,608,608]`的字段,或者`image_shape=[3.-1,-1]`,导出模型将以动态尺寸进行预测。一般RCNN系列模型使用动态图尺寸预测。
Paddle预测库关于动态尺寸输入请查看[Paddle CPP预测](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/05_inference_deployment/inference/native_infer.html)`SetTRTDynamicShapeInfo`函数说明。
`python/infer.py`设置动态尺寸输入参数说明:
- use_dynamic_shape 用于设定TensorRT的输入尺寸是否是动态尺寸,默认值:False
- trt_min_shape 用于设定TensorRT的输入图像height、width中的最小尺寸,默认值:1
- trt_max_shape 用于设定TensorRT的输入图像height、width中的最大尺寸,默认值:1280
......@@ -69,7 +71,7 @@ Paddle预测库关于动态尺寸输入请查看[Paddle CPP预测](https://www.p
`Faster RCNN`为例,使用动态尺寸输入预测:
```
python python/infer.py --model_dir=../inference_model/faster_rcnn_r50_fpn_1x_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp16 --run_benchmark=True --use_dynamic_shape=True --trt_max_shape=1280 --trt_min_shape=800 --trt_opt_shape=960
python python/infer.py --model_dir=../inference_model/faster_rcnn_r50_fpn_1x_coco/ --image_file=../demo/000000014439.jpg --use_gpu=True --run_mode=trt_fp16 --run_benchmark=True --trt_max_shape=1280 --trt_min_shape=800 --trt_opt_shape=960
```
## 4、常见问题QA
......
......@@ -91,6 +91,14 @@ class ConfigPaser {
return false;
}
// Get use_dynamic_shape for TensorRT
if (config["use_dynamic_shape"].IsDefined()) {
use_dynamic_shape_ = config["use_dynamic_shape"].as<bool>();
} else {
std::cerr << "Please set use_dynamic_shape." << std::endl;
return false;
}
return true;
}
std::string mode_;
......@@ -99,6 +107,7 @@ class ConfigPaser {
int min_subgraph_size_;
YAML::Node preprocess_info_;
std::vector<std::string> label_list_;
bool use_dynamic_shape_;
};
} // namespace PaddleDetection
......
......@@ -64,7 +64,6 @@ class ObjectDetector {
const std::string& run_mode="fluid",
const int batch_size=1,
const int gpu_id=0,
bool use_dynamic_shape=false,
const int trt_min_shape=1,
const int trt_max_shape=1280,
const int trt_opt_shape=640,
......@@ -74,12 +73,12 @@ class ObjectDetector {
this->cpu_math_library_num_threads_ = cpu_threads;
this->use_mkldnn_ = use_mkldnn;
this->use_dynamic_shape_ = use_dynamic_shape;
this->trt_min_shape_ = trt_min_shape;
this->trt_max_shape_ = trt_max_shape;
this->trt_opt_shape_ = trt_opt_shape;
this->trt_calib_mode_ = trt_calib_mode;
config_.load_config(model_dir);
this->use_dynamic_shape_ = config_.use_dynamic_shape_;
this->min_subgraph_size_ = config_.min_subgraph_size_;
threshold_ = config_.draw_threshold_;
preprocessor_.Init(config_.preprocess_info_);
......
......@@ -49,7 +49,6 @@ DEFINE_int32(gpu_id, 0, "Device id of GPU to execute");
DEFINE_bool(run_benchmark, false, "Whether to predict a image_file repeatedly for benchmark");
DEFINE_bool(use_mkldnn, false, "Whether use mkldnn with CPU");
DEFINE_int32(cpu_threads, 1, "Num of threads with CPU");
DEFINE_bool(use_dynamic_shape, false, "Trt use dynamic shape or not");
DEFINE_int32(trt_min_shape, 1, "Min shape of TRT DynamicShapeI");
DEFINE_int32(trt_max_shape, 1280, "Max shape of TRT DynamicShapeI");
DEFINE_int32(trt_opt_shape, 640, "Opt shape of TRT DynamicShapeI");
......@@ -361,8 +360,9 @@ int main(int argc, char** argv) {
}
// Load model and create a object detector
PaddleDetection::ObjectDetector det(FLAGS_model_dir, FLAGS_use_gpu, FLAGS_use_mkldnn,
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id, FLAGS_use_dynamic_shape,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape, FLAGS_trt_calib_mode);
FLAGS_cpu_threads, FLAGS_run_mode, FLAGS_batch_size,FLAGS_gpu_id,
FLAGS_trt_min_shape, FLAGS_trt_max_shape, FLAGS_trt_opt_shape,
FLAGS_trt_calib_mode);
// Do inference on input video or image
if (!FLAGS_video_file.empty() || FLAGS_camera_id != -1) {
PredictVideo(FLAGS_video_file, &det);
......@@ -374,13 +374,14 @@ int main(int argc, char** argv) {
if (!FLAGS_image_file.empty()) {
all_imgs.push_back(FLAGS_image_file);
if (FLAGS_batch_size > 1) {
std::cout << "batch_size should be 1, when image_file is not None" << std::endl;
FLAGS_batch_size = 1;
std::cout << "batch_size should be 1, when set `image_file`." << std::endl;
return -1;
}
} else {
GetAllFiles((char *)FLAGS_image_dir.c_str(), all_imgs);
}
PredictImage(all_imgs, FLAGS_batch_size, FLAGS_threshold, FLAGS_run_benchmark, &det, FLAGS_output_dir);
PredictImage(all_imgs, FLAGS_batch_size, FLAGS_threshold,
FLAGS_run_benchmark, &det, FLAGS_output_dir);
}
return 0;
}
......@@ -51,7 +51,6 @@ class Detector(object):
use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
......@@ -65,7 +64,6 @@ class Detector(object):
use_gpu=False,
run_mode='fluid',
batch_size=1,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
......@@ -79,7 +77,7 @@ class Detector(object):
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
use_dynamic_shape=use_dynamic_shape,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
......@@ -189,7 +187,6 @@ class DetectorSOLOv2(Detector):
use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
batch_size (int): size of pre batch in inference
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
......@@ -202,7 +199,6 @@ class DetectorSOLOv2(Detector):
use_gpu=False,
run_mode='fluid',
batch_size=1,
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
......@@ -216,7 +212,7 @@ class DetectorSOLOv2(Detector):
batch_size=batch_size,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
use_dynamic_shape=use_dynamic_shape,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
......@@ -328,6 +324,7 @@ class PredictConfig():
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.labels = yml_conf['label_list']
self.mask = False
self.use_dynamic_shape = yml_conf['use_dynamic_shape']
if 'mask' in yml_conf:
self.mask = yml_conf['mask']
self.print_config()
......@@ -573,7 +570,6 @@ def main():
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
......@@ -587,7 +583,6 @@ def main():
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
batch_size=FLAGS.batch_size,
use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
......
......@@ -158,7 +158,6 @@ def main():
FLAGS.det_model_dir,
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
......@@ -172,7 +171,6 @@ def main():
FLAGS.keypoint_model_dir,
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
......
......@@ -46,7 +46,6 @@ class KeyPoint_Detector(object):
model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
use_gpu (bool): whether use gpu
run_mode (str): mode of running(fluid/trt_fp32/trt_fp16)
use_dynamic_shape (bool): use dynamic shape or not
trt_min_shape (int): min shape for dynamic shape in trt
trt_max_shape (int): max shape for dynamic shape in trt
trt_opt_shape (int): opt shape for dynamic shape in trt
......@@ -59,7 +58,6 @@ class KeyPoint_Detector(object):
model_dir,
use_gpu=False,
run_mode='fluid',
use_dynamic_shape=False,
trt_min_shape=1,
trt_max_shape=1280,
trt_opt_shape=640,
......@@ -72,7 +70,7 @@ class KeyPoint_Detector(object):
run_mode=run_mode,
min_subgraph_size=self.pred_config.min_subgraph_size,
use_gpu=use_gpu,
use_dynamic_shape=use_dynamic_shape,
use_dynamic_shape=self.pred_config.use_dynamic_shape,
trt_min_shape=trt_min_shape,
trt_max_shape=trt_max_shape,
trt_opt_shape=trt_opt_shape,
......@@ -210,6 +208,7 @@ class PredictConfig_KeyPoint():
self.min_subgraph_size = yml_conf['min_subgraph_size']
self.labels = yml_conf['label_list']
self.tagmap = False
self.use_dynamic_shape = yml_conf['use_dynamic_shape']
if 'keypoint_bottomup' == self.archcls:
self.tagmap = True
self.print_config()
......@@ -384,7 +383,6 @@ def main():
FLAGS.model_dir,
use_gpu=FLAGS.use_gpu,
run_mode=FLAGS.run_mode,
use_dynamic_shape=FLAGS.use_dynamic_shape,
trt_min_shape=FLAGS.trt_min_shape,
trt_max_shape=FLAGS.trt_max_shape,
trt_opt_shape=FLAGS.trt_opt_shape,
......
......@@ -84,11 +84,6 @@ def argsparser():
help="Whether use mkldnn with CPU.")
parser.add_argument(
"--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
parser.add_argument(
"--use_dynamic_shape",
type=ast.literal_eval,
default=False,
help="Dynamic_shape for TensorRT.")
parser.add_argument(
"--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
parser.add_argument(
......
......@@ -76,11 +76,6 @@ def argsparser():
help="Whether use mkldnn with CPU.")
parser.add_argument(
"--cpu_threads", type=int, default=1, help="Num of threads with CPU.")
parser.add_argument(
"--use_dynamic_shape",
type=ast.literal_eval,
default=False,
help="Dynamic_shape for TensorRT.")
parser.add_argument(
"--trt_min_shape", type=int, default=1, help="min_shape for TensorRT.")
parser.add_argument(
......
......@@ -81,10 +81,12 @@ def _dump_infer_config(config, path, image_shape, model):
arch_state = False
from ppdet.core.config.yaml_helpers import setup_orderdict
setup_orderdict()
use_dynamic_shape = True if image_shape[1] == -1 else False
infer_cfg = OrderedDict({
'mode': 'fluid',
'draw_threshold': 0.5,
'metric': config['metric'],
'use_dynamic_shape': use_dynamic_shape
})
infer_arch = config['architecture']
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册