未验证 提交 78a530c2 编写于 作者: P Pei Yang 提交者: GitHub

[Paddle-TRT] TRT dynamic shape support PaddleSlim quant models (#26536)

* support trt dynamic shape int8

* add unittest

* add support for sigmoid; adapt to trt6+ api
上级 df0a22d9
......@@ -186,6 +186,14 @@ void TensorRTEngine::FreezeNetwork() {
Vec2TRT_Dims(optim_input_shape_[input.first], input.first, true));
}
infer_builder_config_->addOptimizationProfile(optim_profile_);
infer_builder_config_->setMaxWorkspaceSize(max_workspace_);
if (enable_int8) {
// Due to a bug of TRT, we must set precision BuilderFlag to kFP16 before
// kINT8 here to perform INT8 inference.
infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16);
infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kINT8);
infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES);
}
if (WithFp16()) {
infer_builder_config_->setFlag(nvinfer1::BuilderFlag::kFP16);
if (disable_trt_plugin_fp16()) {
......
......@@ -51,6 +51,7 @@ struct SimpleOpTypeSetTeller : public Teller {
"relu",
"depthwise_conv2d",
"softmax",
"sigmoid",
"batch_norm",
"elementwise_add",
"leaky_relu",
......
......@@ -431,9 +431,9 @@ if(WITH_GPU AND TENSORRT_FOUND)
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models)
set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/quant_small_model")
set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_quant_model")
if (NOT EXISTS ${TRT_MODEL_QUANT_RESNET_DIR})
inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "quant_small_model.tar.gz")
inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "small_quant_model.tgz")
endif()
inference_analysis_test(trt_quant_int8_test SRCS trt_quant_int8_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
......
......@@ -25,12 +25,20 @@ namespace inference {
TEST(quant_int8, resnet50) {
std::string model_dir = FLAGS_infer_model;
AnalysisConfig config;
config.EnableUseGpu(100, 0);
config.EnableUseGpu(1000, 0);
config.SetModel(model_dir);
config.SwitchUseFeedFetchOps(false);
config.EnableTensorRtEngine(1 << 30, 1, 1, AnalysisConfig::Precision::kInt8,
false, false);
std::map<std::string, std::vector<int>> min_input_shape = {
{"image", {1, 1, 3, 3}}};
std::map<std::string, std::vector<int>> max_input_shape = {
{"image", {1, 1, 10, 10}}};
std::map<std::string, std::vector<int>> opt_input_shape = {
{"image", {1, 1, 3, 3}}};
config.SetTRTDynamicShapeInfo(min_input_shape, max_input_shape,
opt_input_shape);
auto predictor = CreatePaddlePredictor(config);
auto input_names = predictor->GetInputNames();
int channels = 1;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册