From a9ed4277496c4a19927d02def3e41e545ae1e6bb Mon Sep 17 00:00:00 2001 From: nhzlx Date: Thu, 7 Mar 2019 03:31:17 +0000 Subject: [PATCH] cant not pass ci add if use static engine for trt test=develop --- paddle/fluid/inference/analysis/argument.h | 6 ++++++ paddle/fluid/inference/analysis/ir_pass_manager.cc | 2 ++ .../inference/analysis/ir_passes/tensorrt_subgraph_pass.cc | 3 ++- paddle/fluid/inference/api/analysis_config.cc | 4 +++- paddle/fluid/inference/api/analysis_predictor.cc | 1 + paddle/fluid/inference/api/paddle_analysis_config.h | 4 +++- paddle/fluid/inference/tests/api/trt_models_tester.cc | 3 ++- paddle/fluid/pybind/inference_api.cc | 3 ++- 8 files changed, 21 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 2f31b182a..89e934ae2 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -23,8 +23,12 @@ #pragma once +#include #include +#include +#include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" @@ -133,6 +137,8 @@ struct Argument { DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int); DECL_ARGUMENT_FIELD(tensorrt_precision_mode, TensorRtPrecisionMode, AnalysisConfig::Precision); + DECL_ARGUMENT_FIELD(tensorrt_use_static_engine, TensorRtUseStaticEngine, + bool); // Memory optimized related. DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 16973aeb8..1cdb4881f 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -82,6 +82,8 @@ void IRPassManager::CreatePasses(Argument *argument, "model_opt_cache_dir", new std::string(GetOrCreateModelOptCacheDir(model_opt_cache_dir))); pass->Set("gpu_device_id", new int(argument->gpu_device_id())); + pass->Set("use_static_engine", + new bool(argument->tensorrt_use_static_engine())); } pre_pass = pass_name; diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 8b796c207..d4e2da895 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -226,10 +226,11 @@ void TensorRtSubgraphPass::CreateTensorRTOp( calibrator.reset(new tensorrt::TRTInt8Calibrator(calibration_data)); } + bool use_static_engine = Get("use_static_engine"); // When in int8 mode and calibration_mode, the program just produce the // calibration table data. bool calibration_mode = (enable_int8 && calibration_data.size() == 0); - if (!calibration_mode) { + if (!calibration_mode && use_static_engine) { std::copy(params.begin(), params.end(), std::back_inserter(*repetitive_params)); std::string trt_engine_serialized_data = GetTrtEngineSerializedData( diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 522ab4952..774111122 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -103,6 +103,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(tensorrt_max_batchsize_); CP_MEMBER(tensorrt_min_subgraph_size_); CP_MEMBER(tensorrt_precision_mode_); + CP_MEMBER(trt_use_static_engine_); // MKLDNN related. CP_MEMBER(use_mkldnn_); CP_MEMBER(mkldnn_enabled_op_types_); @@ -144,7 +145,7 @@ void AnalysisConfig::EnableMKLDNN() { void AnalysisConfig::EnableTensorRtEngine( int workspace_size, int max_batch_size, int min_subgraph_size, - AnalysisConfig::Precision precision_mode) { + AnalysisConfig::Precision precision_mode, bool use_static) { #ifdef PADDLE_WITH_CUDA if (!use_gpu()) { LOG(ERROR) << "To use TensorRT engine, please call EnableGpu() first"; @@ -156,6 +157,7 @@ void AnalysisConfig::EnableTensorRtEngine( tensorrt_max_batchsize_ = max_batch_size; tensorrt_min_subgraph_size_ = min_subgraph_size; tensorrt_precision_mode_ = precision_mode; + trt_use_static_engine_ = use_static; Update(); #else diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index edb15d663..04b3ad943 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -362,6 +362,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { argument_.SetTensorRtMaxBatchSize(config_.tensorrt_max_batchsize_); argument_.SetTensorRtMinSubgraphSize(config_.tensorrt_min_subgraph_size_); argument_.SetTensorRtPrecisionMode(config_.tensorrt_precision_mode_); + argument_.SetTensorRtUseStaticEngine(config_.trt_use_static_engine_); } if (config_.use_mkldnn_) { diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index c1c6227cd..9b05c3350 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -135,7 +135,8 @@ struct AnalysisConfig { */ void EnableTensorRtEngine(int workspace_size = 1 << 20, int max_batch_size = 1, int min_subgraph_size = 3, - Precision precision = Precision::kFloat32); + Precision precision = Precision::kFloat32, + bool use_static = true); /** A boolean state telling whether the TensorRT engine is used. */ bool tensorrt_engine_enabled() const { return use_tensorrt_; } @@ -233,6 +234,7 @@ struct AnalysisConfig { // subgraph, 3 as default value. int tensorrt_min_subgraph_size_{3}; Precision tensorrt_precision_mode_; + bool trt_use_static_engine_; // memory reuse related. bool enable_memory_optim_{false}; diff --git a/paddle/fluid/inference/tests/api/trt_models_tester.cc b/paddle/fluid/inference/tests/api/trt_models_tester.cc index 17a433c9d..cb668a417 100644 --- a/paddle/fluid/inference/tests/api/trt_models_tester.cc +++ b/paddle/fluid/inference/tests/api/trt_models_tester.cc @@ -54,7 +54,8 @@ void SetConfig(AnalysisConfig* config, std::string model_dir, if (use_gpu) { config->EnableUseGpu(100, 0); if (use_tensorrt) { - config->EnableTensorRtEngine(1 << 10, batch_size); + config->EnableTensorRtEngine(1 << 10, batch_size, 3, + AnalysisConfig::Precision::kFloat32, false); config->pass_builder()->DeletePass("conv_bn_fuse_pass"); config->pass_builder()->DeletePass("fc_fuse_pass"); config->pass_builder()->TurnOnDebug(); diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 7db2bb451..03c1b0bd0 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -221,7 +221,8 @@ void BindAnalysisConfig(py::module *m) { .def("enable_tensorrt_engine", &AnalysisConfig::EnableTensorRtEngine, py::arg("workspace_size") = 1 << 20, py::arg("max_batch_size") = 1, py::arg("min_subgraph_size") = 3, - py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32) + py::arg("precision_mode") = AnalysisConfig::Precision::kFloat32, + py::arg("use_static") = true) .def("tensorrt_engine_enabled", &AnalysisConfig::tensorrt_engine_enabled) .def("switch_ir_debug", &AnalysisConfig::SwitchIrDebug, py::arg("x") = true) -- GitLab