From cac9635a6733ffbbd816b33e21c3054e0cd81ab1 Mon Sep 17 00:00:00 2001 From: Pei Yang Date: Fri, 12 Mar 2021 18:48:31 +0800 Subject: [PATCH] [Paddle-TRT] Fix engine key in trt int8 calibration (#31513) * fix engine key in trt int8 calibration * fix unit test --- .../ir_passes/tensorrt_subgraph_pass.cc | 26 ++++++++++++------- .../fluid/inference/api/analysis_predictor.cc | 4 +-- .../operators/tensorrt/tensorrt_engine_op.h | 12 ++++++--- .../tensorrt/tensorrt_engine_op_test.cc | 4 +++ 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc index 8a14e168ca..59ed09b96c 100644 --- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc @@ -86,7 +86,7 @@ std::string GenerateEngineKey(const std::set &engine_inputs, const std::string &predictor_id, const std::string &max_batch_size, const std::string &precision, - const std::string &use_calib_mode) { + const bool for_calibration) { std::string engine_hash_key = ""; for (auto name : engine_inputs) { engine_hash_key += name; @@ -97,12 +97,13 @@ std::string GenerateEngineKey(const std::set &engine_inputs, engine_hash_key += "#"; } engine_hash_key += predictor_id; - engine_hash_key += "#"; - engine_hash_key += max_batch_size; + if (!for_calibration) { + engine_hash_key += "#"; + engine_hash_key += max_batch_size; + } engine_hash_key += "#"; engine_hash_key += precision; - engine_hash_key += "#"; - engine_hash_key += use_calib_mode; + auto engine_key = std::to_string(std::hash()(engine_hash_key)); VLOG(2) << "TRT engine hash key: " << engine_hash_key; VLOG(2) << "TRT engine key: " << engine_key; @@ -258,24 +259,31 @@ void TensorRtSubgraphPass::CreateTensorRTOp( // TODO(NHZlX) // There are models with the same structure but the different parameters, // when running in the 'use_serialize' mode, there is a bug. + // serialization is affected by max_batch_size, but calibration is not. + // So we use seperate engine keys in serialization and calibration. auto engine_key = GenerateEngineKey( input_names_with_id, output_names_with_id, std::to_string(0), std::to_string(Get("max_batch_size")), - std::to_string(static_cast(precision_mode)), - std::to_string(static_cast(use_calib_mode))); + std::to_string(static_cast(precision_mode)), false); + auto calibration_engine_key = GenerateEngineKey( + input_names_with_id, output_names_with_id, std::to_string(0), + std::to_string(Get("max_batch_size")), + std::to_string(static_cast(precision_mode)), true); auto predictor_id = Get("predictor_id"); // Get "" when there is no cached calibration table data. std::string calibration_data = ""; if (enable_int8 && use_calib_mode) { - calibration_data = GetTrtCalibTableData( - Get("model_opt_cache_dir"), engine_key, enable_int8); + calibration_data = + GetTrtCalibTableData(Get("model_opt_cache_dir"), + calibration_engine_key, enable_int8); } op_desc->SetAttr("calibration_data", calibration_data); op_desc->SetAttr("enable_int8", enable_int8); op_desc->SetAttr("enable_fp16", enable_fp16); op_desc->SetAttr("use_calib_mode", use_calib_mode); op_desc->SetAttr("engine_key", engine_key); + op_desc->SetAttr("calibration_engine_key", calibration_engine_key); op_desc->SetAttr("predictor_id", predictor_id); std::string trt_engine_serialized_data = ""; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 2a1dacedca..d6080bd692 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1017,8 +1017,8 @@ bool AnalysisPredictor::SaveTrtCalibToDisk() { auto &block = inference_program_->Block(0); for (auto &op_desc : block.AllOps()) { if (op_desc->Type() == "tensorrt_engine") { - std::string engine_name = - BOOST_GET_CONST(std::string, op_desc->GetAttr("engine_key")); + std::string engine_name = BOOST_GET_CONST( + std::string, op_desc->GetAttr("calibration_engine_key")); if (!Singleton::Global().Has(engine_name)) { LOG(ERROR) << "You should run the predictor(with trt) on the real data " "to generate calibration info"; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index b8805c025a..1f0ae40798 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -89,6 +89,7 @@ class TensorRTEngineOp : public framework::OperatorBase { bool use_calib_mode_; std::string calibration_data_; std::string engine_key_; + std::string calibration_engine_key_; bool calibration_mode_; int predictor_id_; int device_id_; @@ -109,6 +110,7 @@ class TensorRTEngineOp : public framework::OperatorBase { use_calib_mode_ = Attr("use_calib_mode"); calibration_data_ = Attr("calibration_data"); engine_key_ = Attr("engine_key"); + calibration_engine_key_ = Attr("calibration_engine_key"); predictor_id_ = Attr("predictor_id"); auto params = Attr>("parameters"); @@ -172,9 +174,11 @@ class TensorRTEngineOp : public framework::OperatorBase { "Paddle TRT int8..."; int runtime_batch = 1; - if (!Singleton::Global().Has(engine_key_)) { + if (!Singleton::Global().Has( + calibration_engine_key_)) { TRTCalibratorEngine *calib_res = - Singleton::Global().Create(engine_key_); + Singleton::Global().Create( + calibration_engine_key_); std::unordered_map calib_buffers; for (auto &x : input_names_) { if (param_names_.count(x)) continue; @@ -185,7 +189,7 @@ class TensorRTEngineOp : public framework::OperatorBase { runtime_batch = t_shape[0]; } calib_res->calib_.reset(new TRTInt8Calibrator( - calib_buffers, runtime_batch, engine_key_, dev_place)); + calib_buffers, runtime_batch, calibration_engine_key_, dev_place)); calib_res->thr_.reset(new std::thread([&]() { calib_res->engine_.reset(new TensorRTEngine( max_batch_size_, workspace_size_, precision_mode_, @@ -198,7 +202,7 @@ class TensorRTEngineOp : public framework::OperatorBase { TRTInt8Calibrator *temp_calibrator = Singleton::Global() - .Get(engine_key_) + .Get(calibration_engine_key_) ->calib_.get(); std::unordered_map calib_data; diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index 1dcaccd6e9..4e88d79dfe 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -102,6 +102,8 @@ TEST(TensorRTEngineOp, manual) { engine_op_desc.SetAttr("workspace_size", static_cast(1 << 20)); engine_op_desc.SetAttr("parameters", std::vector({})); engine_op_desc.SetAttr("engine_key", std::string("a_engine")); + engine_op_desc.SetAttr("calibration_engine_key", + std::string("a_calib_engine")); engine_op_desc.SetAttr("predictor_id", 1); engine_op_desc.SetAttr("calibration_data", std::string("")); engine_op_desc.SetAttr("enable_int8", static_cast(false)); @@ -204,6 +206,8 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) { engine_op_desc.SetAttr("parameters", std::vector({"y0", "y1", "y2", "y3"})); engine_op_desc.SetAttr("engine_key", std::string("b_engine")); + engine_op_desc.SetAttr("calibration_engine_key", + std::string("b_calib_engine")); engine_op_desc.SetAttr("predictor_id", 1); engine_op_desc.SetAttr("calibration_data", std::string("")); engine_op_desc.SetAttr("enable_int8", static_cast(false)); -- GitLab