[Paddle-TRT] Fix engine key in trt int8 calibration (#31513)

* fix engine key in trt int8 calibration * fix unit test

[Paddle-TRT] Fix engine key in trt int8 calibration (#31513)
* fix engine key in trt int8 calibration * fix unit test
cac9635a · Pei Yang · GitHub · 50ac7dbf · cac9635a · cac9635a
4 changed file
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -86,7 +86,7 @@ std::string GenerateEngineKey(const std::set<std::string> &engine_inputs,
                              const std::string &predictor_id,
                              const std::string &max_batch_size,
                              const std::string &precision,
-                              const std::string &use_calib_mode) {
+                              const bool for_calibration) {
  std::string engine_hash_key = "";
  for (auto name : engine_inputs) {
    engine_hash_key += name;
@@ -97,12 +97,13 @@ std::string GenerateEngineKey(const std::set<std::string> &engine_inputs,
    engine_hash_key += "#";
  }
  engine_hash_key += predictor_id;
-  engine_hash_key += "#";
-  engine_hash_key += max_batch_size;
+  if (!for_calibration) {
+    engine_hash_key += "#";
+    engine_hash_key += max_batch_size;
+  }
  engine_hash_key += "#";
  engine_hash_key += precision;
-  engine_hash_key += "#";
-  engine_hash_key += use_calib_mode;
+
  auto engine_key = std::to_string(std::hash<std::string>()(engine_hash_key));
  VLOG(2) << "TRT engine hash key: " << engine_hash_key;
  VLOG(2) << "TRT engine key: " << engine_key;
@@ -258,24 +259,31 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
  // TODO(NHZlX)
  // There are models with the same structure but the different parameters,
  // when running in the 'use_serialize' mode, there is a bug.
+  // serialization is affected by max_batch_size, but calibration is not.
+  // So we use seperate engine keys in serialization and calibration.
  auto engine_key = GenerateEngineKey(
      input_names_with_id, output_names_with_id, std::to_string(0),
      std::to_string(Get<int>("max_batch_size")),
-      std::to_string(static_cast<int>(precision_mode)),
-      std::to_string(static_cast<int>(use_calib_mode)));
+      std::to_string(static_cast<int>(precision_mode)), false);
+  auto calibration_engine_key = GenerateEngineKey(
+      input_names_with_id, output_names_with_id, std::to_string(0),
+      std::to_string(Get<int>("max_batch_size")),
+      std::to_string(static_cast<int>(precision_mode)), true);
  auto predictor_id = Get<int>("predictor_id");

  // Get "" when there is no cached calibration table data.
  std::string calibration_data = "";
  if (enable_int8 && use_calib_mode) {
-    calibration_data = GetTrtCalibTableData(
-        Get<std::string>("model_opt_cache_dir"), engine_key, enable_int8);
+    calibration_data =
+        GetTrtCalibTableData(Get<std::string>("model_opt_cache_dir"),
+                             calibration_engine_key, enable_int8);
  }
  op_desc->SetAttr("calibration_data", calibration_data);
  op_desc->SetAttr("enable_int8", enable_int8);
  op_desc->SetAttr("enable_fp16", enable_fp16);
  op_desc->SetAttr("use_calib_mode", use_calib_mode);
  op_desc->SetAttr("engine_key", engine_key);
+  op_desc->SetAttr("calibration_engine_key", calibration_engine_key);
  op_desc->SetAttr("predictor_id", predictor_id);

  std::string trt_engine_serialized_data = "";

--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1017,8 +1017,8 @@ bool AnalysisPredictor::SaveTrtCalibToDisk() {
  auto &block = inference_program_->Block(0);
  for (auto &op_desc : block.AllOps()) {
    if (op_desc->Type() == "tensorrt_engine") {
-      std::string engine_name =
-          BOOST_GET_CONST(std::string, op_desc->GetAttr("engine_key"));
+      std::string engine_name = BOOST_GET_CONST(
+          std::string, op_desc->GetAttr("calibration_engine_key"));
      if (!Singleton<TRTCalibratorEngineManager>::Global().Has(engine_name)) {
        LOG(ERROR) << "You should run the predictor(with trt) on the real data "
                      "to generate calibration info";

--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -89,6 +89,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
  bool use_calib_mode_;
  std::string calibration_data_;
  std::string engine_key_;
+  std::string calibration_engine_key_;
  bool calibration_mode_;
  int predictor_id_;
  int device_id_;
@@ -109,6 +110,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
    use_calib_mode_ = Attr<bool>("use_calib_mode");
    calibration_data_ = Attr<std::string>("calibration_data");
    engine_key_ = Attr<std::string>("engine_key");
+    calibration_engine_key_ = Attr<std::string>("calibration_engine_key");
    predictor_id_ = Attr<int>("predictor_id");

    auto params = Attr<std::vector<std::string>>("parameters");
@@ -172,9 +174,11 @@ class TensorRTEngineOp : public framework::OperatorBase {
                            "Paddle TRT int8...";

    int runtime_batch = 1;
-    if (!Singleton<TRTCalibratorEngineManager>::Global().Has(engine_key_)) {
+    if (!Singleton<TRTCalibratorEngineManager>::Global().Has(
+            calibration_engine_key_)) {
      TRTCalibratorEngine *calib_res =
-          Singleton<TRTCalibratorEngineManager>::Global().Create(engine_key_);
+          Singleton<TRTCalibratorEngineManager>::Global().Create(
+              calibration_engine_key_);
      std::unordered_map<std::string, size_t> calib_buffers;
      for (auto &x : input_names_) {
        if (param_names_.count(x)) continue;
@@ -185,7 +189,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
        runtime_batch = t_shape[0];
      }
      calib_res->calib_.reset(new TRTInt8Calibrator(
-          calib_buffers, runtime_batch, engine_key_, dev_place));
+          calib_buffers, runtime_batch, calibration_engine_key_, dev_place));
      calib_res->thr_.reset(new std::thread([&]() {
        calib_res->engine_.reset(new TensorRTEngine(
            max_batch_size_, workspace_size_, precision_mode_,
@@ -198,7 +202,7 @@ class TensorRTEngineOp : public framework::OperatorBase {

    TRTInt8Calibrator *temp_calibrator =
        Singleton<TRTCalibratorEngineManager>::Global()
-            .Get(engine_key_)
+            .Get(calibration_engine_key_)
            ->calib_.get();
    std::unordered_map<std::string, void *> calib_data;


--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc
@@ -102,6 +102,8 @@ TEST(TensorRTEngineOp, manual) {
  engine_op_desc.SetAttr("workspace_size", static_cast<int>(1 << 20));
  engine_op_desc.SetAttr("parameters", std::vector<std::string>({}));
  engine_op_desc.SetAttr("engine_key", std::string("a_engine"));
+  engine_op_desc.SetAttr("calibration_engine_key",
+                         std::string("a_calib_engine"));
  engine_op_desc.SetAttr("predictor_id", 1);
  engine_op_desc.SetAttr("calibration_data", std::string(""));
  engine_op_desc.SetAttr("enable_int8", static_cast<bool>(false));
@@ -204,6 +206,8 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
  engine_op_desc.SetAttr("parameters",
                         std::vector<std::string>({"y0", "y1", "y2", "y3"}));
  engine_op_desc.SetAttr("engine_key", std::string("b_engine"));
+  engine_op_desc.SetAttr("calibration_engine_key",
+                         std::string("b_calib_engine"));
  engine_op_desc.SetAttr("predictor_id", 1);
  engine_op_desc.SetAttr("calibration_data", std::string(""));
  engine_op_desc.SetAttr("enable_int8", static_cast<bool>(false));