From 78a530c219d06d09286d9f4d2c65d9516b60ec83 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Mon, 31 Aug 2020 14:52:12 +0800 Subject: [PATCH] [Paddle-TRT] TRT dynamic shape support PaddleSlim quant models (#26536) * support trt dynamic shape int8 * add unittest * add support for sigmoid; adapt to trt6+ api --- paddle/fluid/inference/tensorrt/engine.cc | 8 ++++++++ paddle/fluid/inference/tensorrt/op_teller.cc | 1 + paddle/fluid/inference/tests/api/CMakeLists.txt | 4 ++-- .../fluid/inference/tests/api/trt_quant_int8_test.cc | 10 +++++++++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 03f5a751511..22be8774932 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -186,6 +186,14 @@ void TensorRTEngine::FreezeNetwork() { Vec2TRT_Dims(optim_input_shape_[input.first], input.first, true)); } infer_builder_config_->addOptimizationProfile(optim_profile_); + infer_builder_config_->setMaxWorkspaceSize(max_workspace_); + if (enable_int8) { + // Due to a bug of TRT, we must set precision BuilderFlag to kFP16 before + // kINT8 here to perform INT8 inference. + infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16); + infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kINT8); + infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES); + } if (WithFp16()) { infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16); if (disable_trt_plugin_fp16()) { diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index f5d22b982de..e8cbb9431cb 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -51,6 +51,7 @@ struct SimpleOpTypeSetTeller : public Teller { "relu", "depthwise_conv2d", "softmax", + "sigmoid", "batch_norm", "elementwise_add", "leaky_relu", diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 2bd30bc0517..07af5c152b1 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -431,9 +431,9 @@ if(WITH_GPU AND TENSORRT_FOUND) EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) - set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant_small_model") + set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_quant_model") if (NOT EXISTS ${TRT_MODEL_QUANT_RESNET_DIR}) - inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "quant_small_model.tar.gz") + inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "small_quant_model.tgz") endif() inference_analysis_test(trt_quant_int8_test SRCS trt_quant_int8_test.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} diff --git a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc index ca5cdbbcb26..6adf3cf743b 100644 --- a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc +++ b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc @@ -25,12 +25,20 @@ namespace inference { TEST(quant_int8, resnet50) { std::string model_dir = FLAGS_infer_model; AnalysisConfig config; - config.EnableUseGpu(100, 0); + config.EnableUseGpu(1000, 0); config.SetModel(model_dir); config.SwitchUseFeedFetchOps(false); config.EnableTensorRtEngine(1 << 30, 1, 1, AnalysisConfig::Precision::kInt8, false, false); + std::map> min_input_shape = { + {"image", {1, 1, 3, 3}}}; + std::map> max_input_shape = { + {"image", {1, 1, 10, 10}}}; + std::map> opt_input_shape = { + {"image", {1, 1, 3, 3}}}; + config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape, + opt_input_shape); auto predictor = CreatePaddlePredictor(config); auto input_names = predictor->GetInputNames(); int channels = 1; -- GitLab