From 312fe0ece16dc316904318b61f3dacaa8777eade Mon Sep 17 00:00:00 2001
From: nhzlx <zlx_hg@163.com>
Date: Wed, 16 Jan 2019 08:27:59 +0000
Subject: [PATCH] add trt int8 calibration support fix comments

test=develop
---
 paddle/fluid/inference/analysis/argument.h    |  3 +-
 paddle/fluid/inference/analysis/helper.cc     |  8 +++
 paddle/fluid/inference/analysis/helper.h      |  8 ++-
 .../inference/analysis/ir_pass_manager.cc     | 10 ++--
 .../ir_passes/tensorrt_subgraph_pass.cc       | 37 +++++++++-----
 paddle/fluid/inference/api/analysis_config.cc | 12 ++---
 .../fluid/inference/api/analysis_predictor.cc | 49 +++++++++++--------
 .../fluid/inference/api/analysis_predictor.h  | 13 +++++
 .../inference/api/paddle_analysis_config.h    |  8 ++-
 paddle/fluid/inference/tensorrt/engine.cc     |  2 +-
 paddle/fluid/inference/tensorrt/engine.h      |  6 +--
 .../inference/tensorrt/trt_int8_calibrator.cc | 31 ++++++------
 .../inference/tensorrt/trt_int8_calibrator.h  | 28 +++++------
 .../operators/tensorrt/tensorrt_engine_op.cc  |  3 +-
 .../operators/tensorrt/tensorrt_engine_op.h   | 38 +++++++-------
 15 files changed, 158 insertions(+), 98 deletions(-)
diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h
index b06ff63a741..c317172fa29 100644
--- a/paddle/fluid/inference/analysis/argument.h
+++ b/paddle/fluid/inference/analysis/argument.h
@@ -28,6 +28,7 @@
 #include "paddle/fluid/framework/ir/graph.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/scope.h"
+#include "paddle/fluid/inference/api/paddle_analysis_config.h"
 #include "paddle/fluid/platform/variant.h"
 
 namespace paddle {
@@ -128,7 +129,7 @@ struct Argument {
   DECL_ARGUMENT_FIELD(tensorrt_workspace_size, TensorRtWorkspaceSize, int);
   DECL_ARGUMENT_FIELD(tensorrt_min_subgraph_size, TensorRtMinSubgraphSize, int);
   DECL_ARGUMENT_FIELD(tensorrt_precision_mode, TensorRtPrecisionMode,
-                      std::string);
+                      contrib::AnalysisConfig::Precision);
 
   // The program transformed by IR analysis phase.
   DECL_ARGUMENT_UNIQUE_FIELD(ir_analyzed_program, IrAnalyzedProgram,
diff --git a/paddle/fluid/inference/analysis/helper.cc b/paddle/fluid/inference/analysis/helper.cc
index ca40c01fc57..4f5c50d0d6b 100644
--- a/paddle/fluid/inference/analysis/helper.cc
+++ b/paddle/fluid/inference/analysis/helper.cc
@@ -36,6 +36,14 @@ void SetAttr<int>(framework::proto::OpDesc *op, const std::string &name,
   attr->set_i(data);
 }
 template <>
+void SetAttr<bool>(framework::proto::OpDesc *op, const std::string &name,
+                   const bool &data) {
+  auto *attr = op->add_attrs();
+  attr->set_name(name);
+  attr->set_type(paddle::framework::proto::AttrType::BOOLEAN);
+  attr->set_b(data);
+}
+template <>
 void SetAttr<int64_t>(framework::proto::OpDesc *op, const std::string &name,
                       const int64_t &data) {
   auto *attr = op->add_attrs();
diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h
index 5df3aacc3f2..40c94d9904e 100644
--- a/paddle/fluid/inference/analysis/helper.h
+++ b/paddle/fluid/inference/analysis/helper.h
@@ -156,7 +156,7 @@ static bool PathExists(const std::string &path) {
   return false;
 }
 
-static std::string SplitPath(const std::string path) {
+static std::string GetDirRoot(const std::string path) {
   char sep = '/';
 
 #ifdef _WIN32
@@ -167,10 +167,14 @@ static std::string SplitPath(const std::string path) {
   if (i != std::string::npos) {
     return (path.substr(0, i));
   }
-
   return path;
 }
 
+static std::string GetTrtCalibPath(const std::string &model_root,
+                                   const std::string &engine_key) {
+  return model_root + "/trt_calib_" + engine_key;
+}
+
 }  // namespace analysis
 }  // namespace inference
 }  // namespace paddle
diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc
index a9960557743..f9ef0a68e9f 100644
--- a/paddle/fluid/inference/analysis/ir_pass_manager.cc
+++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc
@@ -71,13 +71,17 @@ void IRPassManager::CreatePasses(Argument *argument,
           "program",
           new framework::ProgramDesc *(
               const_cast<framework::ProgramDesc *>(&argument->main_program())));
-      pass->Set("precision_mode",
-                new std::string(argument->tensorrt_precision_mode()));
+
+      bool enable_int8 = false;
+      if (argument->tensorrt_precision_mode() ==
+          contrib::AnalysisConfig::Precision::kInt8)
+        enable_int8 = true;
+
+      pass->Set("enable_int8", new bool(enable_int8));
       pass->Set("model_dir", new std::string(argument->model_path()));
     }
 
     // graph_ = pass->Apply(std::move(graph_));
-
     pre_pass = pass_name;
 
     passes_.emplace_back(std::move(pass));
diff --git a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
index 634c5ead0a3..34991b6fbca 100644
--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <algorithm>
+#include <set>
 #include <string>
 #include <vector>
 
@@ -93,8 +94,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
   }
 
   // collect inputs
-  std::unordered_set<std::string> input_names;
-  std::unordered_set<std::string> input_names_with_id;
+  std::set<std::string> input_names;
+  std::set<std::string> input_names_with_id;
   for (auto *x : node->inputs) {
     input_names.insert(x->Name());
     input_names_with_id.insert(x->Name() + std::to_string(x->id()));
@@ -102,8 +103,8 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
   op_desc->SetInput(
       "Xs", std::vector<std::string>(input_names.begin(), input_names.end()));
 
-  std::unordered_set<std::string> output_names;
-  std::unordered_set<std::string> output_names_with_id;
+  std::set<std::string> output_names;
+  std::set<std::string> output_names_with_id;
   for (auto *x : node->outputs) {
     output_names.insert(x->Name());
     output_names_with_id.insert(x->Name() + std::to_string(x->id()));
@@ -203,28 +204,40 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
                  "the block has no var-desc");
   PADDLE_ENFORCE(!output_mapping.empty());
   op_desc->SetBlockAttr("sub_block", new_block);
-  // Set attrs
   SetAttr(op_desc->Proto(), "subgraph",
           block_desc.Proto()->SerializeAsString());
+  // Set attrs
   SetAttr(op_desc->Proto(), "max_batch_size", Get<int>("max_batch_size"));
   SetAttr(op_desc->Proto(), "workspace_size", Get<int>("workspace_size"));
   SetAttr(op_desc->Proto(), "parameters", ExtractParameters(graph->Nodes()));
   SetAttr(op_desc->Proto(), "output_name_mapping", output_mapping);
 
-  std::string engine_key = std::to_string(
-      std::hash<std::string>()(block_desc.Proto()->SerializeAsString()));
-  std::string precision_mode = Get<std::string>("precision_mode");
+  auto enable_int8 = Get<bool>("enable_int8");
   SetAttr(op_desc->Proto(), "calibration_data", std::string(""));
-  std::string trt_calib_file =
-      Get<std::string>("model_dir") + "/trt_calib_" + engine_key;
-  if (precision_mode == "INT8" && FileExists(trt_calib_file)) {
+
+  // we use the subgraph's inputs and outputs to generate the engine key.
+  std::string engine_hash_key = "";
+  for (auto name : input_names_with_id) {
+    engine_hash_key += name;
+  }
+  for (auto name : output_names_with_id) {
+    engine_hash_key += name;
+  }
+
+  auto engine_key = std::to_string(std::hash<std::string>()(engine_hash_key));
+
+  auto trt_calib_file =
+      GetTrtCalibPath(Get<std::string>("model_dir"), engine_key);
+  VLOG(3) << "engine key: " << engine_key;
+  if (enable_int8 && FileExists(trt_calib_file)) {
+    VLOG(3) << "Calibration table file: " << trt_calib_file << "is found here";
     std::ifstream infile(trt_calib_file, std::ios::in);
     std::stringstream buffer;
     buffer << infile.rdbuf();
     std::string calibration_data(buffer.str());
     SetAttr(op_desc->Proto(), "calibration_data", calibration_data);
   }
-  SetAttr(op_desc->Proto(), "precision_mode", precision_mode);
+  SetAttr(op_desc->Proto(), "enable_int8", enable_int8);
   SetAttr(op_desc->Proto(), "engine_key", engine_key);
 }
 
diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc
index 399db291fd1..7c7efe7a3d5 100644
--- a/paddle/fluid/inference/api/analysis_config.cc
+++ b/paddle/fluid/inference/api/analysis_config.cc
@@ -122,13 +122,13 @@ void contrib::AnalysisConfig::EnableMKLDNN() {
 #endif
 }
 
-void contrib::AnalysisConfig::EnableTensorRtEngine(int workspace_size,
-                                                   int max_batch_size,
-                                                   int min_subgraph_size,
-                                                   std::string precision_mode) {
+void contrib::AnalysisConfig::EnableTensorRtEngine(
+    int workspace_size, int max_batch_size, int min_subgraph_size,
+    contrib::AnalysisConfig::Precision precision_mode) {
   use_tensorrt_ = true;
   tensorrt_workspace_size_ = workspace_size;
   tensorrt_max_batchsize_ = max_batch_size;
+  tensorrt_min_subgraph_size_ = min_subgraph_size;
   tensorrt_precision_mode_ = precision_mode;
   Update();
 }
@@ -149,7 +149,7 @@ void contrib::AnalysisConfig::Update() {
           << "TensorRT engine is not available when EnableGpu() not actived.";
     } else {
       // Append after the infer_clean pass.
-      pass_builder()->InsertPass(1, "tensorrt_subgraph_pass");
+      pass_builder()->InsertPass(3, "tensorrt_subgraph_pass");
     }
   }
 
@@ -180,7 +180,7 @@ std::string contrib::AnalysisConfig::SerializeInfoCache() {
   ss << use_tensorrt_;
   ss << tensorrt_workspace_size_;
   ss << tensorrt_max_batchsize_;
-  ss << tensorrt_precision_mode_;
+  ss << tensorrt_min_subgraph_size_;
 
   ss << use_mkldnn_;
   ss << enable_ir_optim_;
diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 75c62bb98cb..838016bd766 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -30,9 +30,9 @@
 #include "paddle/fluid/inference/api/paddle_inference_pass.h"
 #if PADDLE_WITH_TENSORRT
 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
 #endif
 #include "paddle/fluid/inference/analysis/helper.h"
-#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
 #include "paddle/fluid/inference/utils/singleton.h"
 #include "paddle/fluid/memory/memcpy.h"
 #include "paddle/fluid/platform/cpu_helper.h"
@@ -46,8 +46,8 @@ namespace paddle {
 using contrib::AnalysisConfig;
 using inference::Singleton;
 using inference::tensorrt::TRTInt8Calibrator;
-using inference::tensorrt::TRTCalibratorRes;
-using inference::tensorrt::TRTCalibratorResManager;
+using inference::tensorrt::TRTCalibratorEngine;
+using inference::tensorrt::TRTCalibratorEngineManager;
 
 namespace {
 bool IsPersistable(const framework::VarDesc *var) {
@@ -334,7 +334,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
         !config_.params_file().empty(),
         "Either model_dir or (param_file, prog_file) should be set.");
     PADDLE_ENFORCE(!config_.prog_file().empty());
-    std::string dir = inference::analysis::SplitPath(config_.prog_file());
+    std::string dir = inference::analysis::GetDirRoot(config_.prog_file());
 
     argument_.SetModelPath(dir);
     argument_.SetModelProgramPath(config_.prog_file());
@@ -562,6 +562,7 @@ bool AnalysisPredictor::LoadParameters() {
   return true;
 }
 
+#if PADDLE_WITH_TENSORRT
 bool AnalysisPredictor::SaveTrtCalibToDisk() {
   PADDLE_ENFORCE(config_.tensorrt_engine_enabled(),
                  "This func can be invoked only in trt mode");
@@ -570,44 +571,50 @@ bool AnalysisPredictor::SaveTrtCalibToDisk() {
     if (op_desc->Type() == "tensorrt_engine") {
       std::string engine_name =
           boost::get<std::string>(op_desc->GetAttr("engine_key"));
-      if (!Singleton<TRTCalibratorResManager>::Global().Has(engine_name)) {
+      if (!Singleton<TRTCalibratorEngineManager>::Global().Has(engine_name)) {
         LOG(ERROR) << "You should run the predictor(with trt) on the real data "
                       "to generate calibration info";
         return false;
       }
-      TRTCalibratorRes *calib_res =
-          Singleton<TRTCalibratorResManager>::Global().Get(engine_name);
+      TRTCalibratorEngine *calib_engine =
+          Singleton<TRTCalibratorEngineManager>::Global().Get(engine_name);
       LOG(INFO) << "Wait for calib threads done.";
-      calib_res->calib_->waitAndSetDone();
+      calib_engine->calib_->waitAndSetDone();
       LOG(INFO) << "Finish wait.";
-      calib_res->thr_->join();
-      std::string calibration_data =
-          calib_res->calib_->getCalibrationTableAsString();
+      calib_engine->thr_->join();
+      std::string calibration_table_data =
+          calib_engine->calib_->getCalibrationTableAsString();
 
-      if (calibration_data.size() == 0) {
+      if (calibration_table_data.empty()) {
         LOG(ERROR) << "the calibration table is empty.";
         return false;
       }
-      std::string calibration_data_path =
-          argument_.model_path() + "/trt_calib_" + engine_name;
-      std::ofstream ofile(calibration_data_path, std::ios::out);
-      LOG(INFO) << "Write Paddle-TRT INT8 calibration data to file "
-                << calibration_data_path;
-      ofile << calibration_data;
+
+      std::string calibration_table_data_path =
+          inference::analysis::GetTrtCalibPath(argument_.model_path(),
+                                               engine_name);
+
+      std::ofstream ofile(calibration_table_data_path, std::ios::out);
+      LOG(INFO) << "Write Paddle-TRT INT8 calibration table data to file "
+                << calibration_table_data_path;
+      ofile << calibration_table_data;
       ofile.close();
     }
   }
   // Free all calibrator resources.
-  Singleton<TRTCalibratorResManager>::Global().DeleteALL();
+  Singleton<TRTCalibratorEngineManager>::Global().DeleteALL();
   return true;
 }
+#endif
 
 AnalysisPredictor::~AnalysisPredictor() {
+#if PADDLE_WITH_TENSORRT
   if (config_.tensorrt_engine_enabled() &&
-      config_.tensorrt_precision_mode_ == "INT8" &&
-      Singleton<TRTCalibratorResManager>::Global().Has()) {
+      config_.tensorrt_precision_mode_ == AnalysisConfig::Precision::kInt8 &&
+      Singleton<TRTCalibratorEngineManager>::Global().Has()) {
     SaveTrtCalibToDisk();
   }
+#endif
   if (FLAGS_profile) {
     platform::DisableProfiler(platform::EventSortingKey::kTotal,
                               "./profile.log");
diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h
index cec36a0d3a9..c87987b1671 100644
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -91,7 +91,20 @@ class AnalysisPredictor : public PaddlePredictor {
   void GetFetchOne(const framework::LoDTensor &fetchs,
                    PaddleTensor *output_data);
 
+#if PADDLE_WITH_TENSORRT
+  // When we use Paddle-TRT INT8 engine, we need to generate calibration table
+  // data first,
+  // the calibration table contains the range for each op's input and output,
+  // this whole process can be divided into several steps:
+  //
+  // 1. Builds a 32-bit engine, runs it on the calibration set, and records a
+  // histogram for each
+  // tensor of the distribution of activation values.
+  // 2. Builds a calibration table from the histograms.
+  //
+  // After step 2, we need to store the calibration table on disk
   bool SaveTrtCalibToDisk();
+#endif
 
   ~AnalysisPredictor();
 
diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h
index 14b16d08b34..118af6f4013 100644
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -42,6 +42,10 @@ struct AnalysisConfig {
   explicit AnalysisConfig(const std::string& model_dir);
   explicit AnalysisConfig(const std::string& prog_file,
                           const std::string& params_file);
+  enum class Precision {
+    kFloat32 = 0,
+    kInt8,
+  };
 
   /** Set model with a directory.
    */
@@ -136,7 +140,7 @@ struct AnalysisConfig {
    */
   void EnableTensorRtEngine(int workspace_size = 1 << 20,
                             int max_batch_size = 1, int min_subgraph_size = 3,
-                            std::string precision = "FP32");
+                            Precision precision = Precision::kFloat32);
   /** A boolean state telling whether the TensorRT engine is used.
    */
   bool tensorrt_engine_enabled() const { return use_tensorrt_; }
@@ -232,7 +236,7 @@ struct AnalysisConfig {
   //  We set this variable to control the minimum number of nodes in the
   //  subgraph, 3 as default value.
   int tensorrt_min_subgraph_size_{3};
-  std::string tensorrt_precision_mode_;
+  Precision tensorrt_precision_mode_;
 
   bool use_mkldnn_{false};
   std::unordered_set<std::string> mkldnn_enabled_op_types_;
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index 43f99df4637..808e93d2edc 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -70,7 +70,7 @@ void TensorRTEngine::FreezeNetwork() {
   // build engine.
   infer_builder_->setMaxBatchSize(max_batch_);
   infer_builder_->setMaxWorkspaceSize(max_workspace_);
-  if (precision_mode_ == "INT8") {
+  if (enable_int8_) {
     infer_builder_->setInt8Mode(true);
     PADDLE_ENFORCE(
         calibrator_ != nullptr,
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 9aed374dce4..788a4493c00 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -58,14 +58,14 @@ class TensorRTEngine : public EngineBase {
 
   TensorRTEngine(int max_batch, int max_workspace,
                  cudaStream_t* stream = nullptr, int device = 0,
-                 std::string precision_mode = "FP32",
+                 bool enable_int8 = "false",
                  TRTInt8Calibrator* calibrator = nullptr,
                  nvinfer1::ILogger& logger = NaiveLogger::Global())
       : max_batch_(max_batch),
         max_workspace_(max_workspace),
         stream_(stream ? stream : &default_stream_),
         device_(device),
-        precision_mode_(precision_mode),
+        enable_int8_(enable_int8),
         calibrator_(calibrator),
         logger_(logger) {
     freshDeviceId();
@@ -168,7 +168,7 @@ class TensorRTEngine : public EngineBase {
   // The specific GPU id that the TensorRTEngine bounded to.
   int device_;
 
-  std::string precision_mode_;
+  bool enable_int8_;
   TRTInt8Calibrator* calibrator_;
   // batch size of the current data, will be updated each Executation.
   int batch_size_{-1};
diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
index f9356200204..4a85c8b8fe6 100644
--- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
+++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.cc
@@ -25,11 +25,7 @@ int TRTInt8Calibrator::getBatchSize() const { return batch_size_; }
 TRTInt8Calibrator::TRTInt8Calibrator(
     const std::unordered_map<std::string, size_t>& buffers, int batch_size,
     std::string engine_name, const platform::Place place)
-    : batch_size_(batch_size),
-      calib_running_(true),
-      data_is_set_(false),
-      done_(false),
-      engine_name_(engine_name) {
+    : batch_size_(batch_size), engine_name_(engine_name) {
   int i = 0;
   VLOG(4) << "Init a new calibrator: " << engine_name_;
   for (const auto it : buffers) {
@@ -62,28 +58,32 @@ void TRTInt8Calibrator::waitAndSetDone() {
   }
 }
 
+// There might be more than one input for trt subgraph,
+// So, we use a map to store input information.
 bool TRTInt8Calibrator::setBatch(
     const std::unordered_map<std::string, void*>& data) {
   VLOG(3) << "set batch: " << engine_name_;
   std::unique_lock<std::mutex> lk(mut_);
+  //  There is a producer and a consumer. The producer set the batch data and
+  //  the consumer get the batch data. The size of the data pool is one.
+  //  So, the producer has to wait for the consumer to finish processing before
+  //  they can set the data.
   while ((calib_running_ || data_is_set_) && (!done_)) cond_.wait(lk);
+  // The done_ is set to true using waitAndSetDone, When all calibration data
+  // are processed.
   if (done_) return false;
 
   // Sets the batch.
-  for (const auto it : data) {
+  for (const auto& it : data) {
     auto dataptr = data_buffers_.find(it.first);
     if (dataptr == data_buffers_.end()) {
       LOG(FATAL) << "FATAL " << engine_name_ << " input name '" << it.first
                  << "' does not match with the buffer names";
     }
-
     const auto& d = dataptr->second;
-    auto status =
-        cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice);
-    if (status != cudaSuccess) {
-      LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first
-                 << "' failed with " << status;
-    }
+    PADDLE_ENFORCE(
+        cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice),
+        "Fail to cudaMemcpy %s for %s", engine_name_, it.first);
   }
 
   data_is_set_ = true;
@@ -95,9 +95,12 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names,
                                  int num_bindings) {
   VLOG(4) << "get batch: " << engine_name_;
   std::unique_lock<std::mutex> lk(mut_);
+  // The consumer has just finished processing a data.
+  // The producer can set the data again.
   calib_running_ = false;
   cond_.notify_all();
 
+  // As long as there is data in the pool, the consumer can get it.
   while (!data_is_set_ && !done_) cond_.wait(lk);
   if (done_) return false;
 
@@ -123,7 +126,7 @@ void TRTInt8Calibrator::setDone() {
   cond_.notify_all();
 }
 
-const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) {
+const void* TRTInt8Calibrator::readCalibrationCache(size_t& length) {
   if (calibration_table_.empty()) return nullptr;
   length = calibration_table_.size();
   return calibration_table_.data();
diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h
index 13f6e7ad011..919f5d55f88 100644
--- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h
+++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h
@@ -21,8 +21,8 @@
 #include <utility>
 #include <vector>
 
-#include "NvInfer.h"
-#include "cuda_runtime_api.h"
+#include <NvInfer.h>
+#include <cuda_runtime_api.h>
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/inference/tensorrt/engine.h"
 #include "paddle/fluid/platform/place.h"
@@ -60,9 +60,9 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator {
  private:
   const int batch_size_;
 
-  bool calib_running_;
-  bool data_is_set_;
-  bool done_;
+  bool calib_running_{true};
+  bool data_is_set_{false};
+  bool done_{false};
 
   std::mutex mut_;
   std::condition_variable cond_;
@@ -74,9 +74,9 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator {
   std::string calibration_table_;
 };
 
-class TRTCalibratorRes {
+class TRTCalibratorEngine {
  public:
-  TRTCalibratorRes() {}
+  TRTCalibratorEngine() {}
   std::unique_ptr<TRTInt8Calibrator> calib_;
   std::unique_ptr<std::thread> thr_;
   std::unique_ptr<TensorRTEngine> engine_;
@@ -84,7 +84,7 @@ class TRTCalibratorRes {
 /*
  * Manager to control the TensorRT Int8 calibration creation and deltetion.
  */
-class TRTCalibratorResManager {
+class TRTCalibratorEngineManager {
  public:
   bool Has() const { return res_.size() > 0; }
   bool Has(const std::string& name) const {
@@ -93,22 +93,22 @@ class TRTCalibratorResManager {
   }
 
   // Get Int8Calibrator via name
-  TRTCalibratorRes* Get(const std::string& name) const {
+  TRTCalibratorEngine* Get(const std::string& name) const {
     return res_.at(name).get();
   }
 
   // Look up or create a calibrator.
-  TRTCalibratorRes* LookupOrCreate(const std::string& engine_name) {
+  TRTCalibratorEngine* LookupOrCreate(const std::string& engine_name) {
     if (res_.count(engine_name) == 0) {
-      auto* p = new TRTCalibratorRes();
+      auto* p = new TRTCalibratorEngine;
       res_[engine_name].reset(p);
     }
     return res_.at(engine_name).get();
   }
 
   // Create an Int8Calibrator
-  TRTCalibratorRes* Create(const std::string& engine_name) {
-    auto* p = new TRTCalibratorRes();
+  TRTCalibratorEngine* Create(const std::string& engine_name) {
+    auto* p = new TRTCalibratorEngine;
     res_[engine_name].reset(p);
     return p;
   }
@@ -120,7 +120,7 @@ class TRTCalibratorResManager {
   }
 
  private:
-  std::unordered_map<std::string, std::unique_ptr<TRTCalibratorRes>> res_;
+  std::unordered_map<std::string, std::unique_ptr<TRTCalibratorEngine>> res_;
 };
 
 }  // namespace tensorrt
diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
index ed177eb18f8..031335009b6 100644
--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.cc
@@ -36,8 +36,7 @@ class TensorRTEngineOpMaker : public framework::OpProtoAndCheckerMaker {
     AddAttr<int>("max_batch_size", "the maximum batch size.");
     AddAttr<int>("workspace_size", "the workspace size.");
     AddAttr<framework::BlockDesc *>("sub_block", "the trt block");
-    AddAttr<std::string>("precision_mode",
-                         "the precision mode: 'FP32', 'INT8' ");
+    AddAttr<bool>("enable_int8", "whether swith to int8 mode");
     AddComment("TensorRT engine operator.");
   }
 };
diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
index 57747faec85..d27e013dc47 100644
--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -65,8 +65,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
 using inference::Singleton;
 using inference::tensorrt::TensorRTEngine;
 using inference::tensorrt::TRTInt8Calibrator;
-using inference::tensorrt::TRTCalibratorRes;
-using inference::tensorrt::TRTCalibratorResManager;
+using inference::tensorrt::TRTCalibratorEngine;
+using inference::tensorrt::TRTCalibratorEngineManager;
 
 class TensorRTEngineOp : public framework::OperatorBase {
  private:
@@ -76,7 +76,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
   int max_batch_size_;
   int workspace_size_;
   std::unique_ptr<TRTInt8Calibrator> calibrator_;
-  std::string precision_mode_;
+  bool enable_int8_;
   std::string calibration_data_;
   std::string engine_key_;
   bool calibration_mode_;
@@ -90,7 +90,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
     input_names_ = Inputs("Xs");
     max_batch_size_ = Attr<int>("max_batch_size");
     workspace_size_ = Attr<int>("workspace_size");
-    precision_mode_ = Attr<std::string>("precision_mode");
+    enable_int8_ = Attr<bool>("enable_int8");
     calibration_data_ = Attr<std::string>("calibration_data");
     engine_key_ = Attr<std::string>("engine_key");
 
@@ -98,17 +98,19 @@ class TensorRTEngineOp : public framework::OperatorBase {
     for (const auto &param : params) {
       param_names_.insert(param);
     }
-    calibration_mode_ =
-        (precision_mode_ == "INT8" && calibration_data_.size() == 0);
+    // calibration_mode is ture represents we need to
+    // generate the calibration table data.
+    calibration_mode_ = (enable_int8_ && calibration_data_.size() == 0);
 
-    if (precision_mode_ == "INT8" && calibration_data_.size()) {
+    VLOG(4) << "calibration_mode: " << calibration_mode_;
+    if (enable_int8_ && calibration_data_.size()) {
       calibrator_.reset(new TRTInt8Calibrator(calibration_data_));
     }
   }
 
  protected:
-  void RunNative(const framework::Scope &scope,
-                 const platform::Place &dev_place) const {
+  void RunNativeImpl(const framework::Scope &scope,
+                     const platform::Place &dev_place) const {
     framework::Executor executor(dev_place);
     auto *block = Attr<framework::BlockDesc *>("sub_block");
     auto *program = block->Program();
@@ -128,12 +130,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
 
   void RunCalibration(const framework::Scope &scope,
                       const platform::Place &dev_place) const {
-    // Create calibrator here.
+    // This process will builds a 32-bit trt engine, runs it on the calibration
+    // set, and records a histogram for each
+    // tensor of the distribution of activation values.
     LOG(INFO) << "Running calibration trt int8 ...";
     int runtime_batch = 1;
-    if (!Singleton<TRTCalibratorResManager>::Global().Has(engine_key_)) {
-      TRTCalibratorRes *calib_res =
-          Singleton<TRTCalibratorResManager>::Global().Create(engine_key_);
+    if (!Singleton<TRTCalibratorEngineManager>::Global().Has(engine_key_)) {
+      TRTCalibratorEngine *calib_res =
+          Singleton<TRTCalibratorEngineManager>::Global().Create(engine_key_);
       std::unordered_map<std::string, size_t> calib_buffers;
       for (auto &x : input_names_) {
         if (param_names_.count(x)) continue;
@@ -148,7 +152,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
       calib_res->thr_.reset(new std::thread([&]() {
         calib_res->engine_.reset(new TensorRTEngine(
             max_batch_size_, workspace_size_, nullptr,
-            boost::get<platform::CUDAPlace>(dev_place).device, precision_mode_,
+            boost::get<platform::CUDAPlace>(dev_place).device, enable_int8_,
             calib_res->calib_.get()));
         VLOG(3) << "start the calib trt engine thread";
         Prepare(scope, dev_place, calib_res->engine_.get());
@@ -156,7 +160,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
     }
 
     TRTInt8Calibrator *temp_calibrator =
-        Singleton<TRTCalibratorResManager>::Global()
+        Singleton<TRTCalibratorEngineManager>::Global()
             .Get(engine_key_)
             ->calib_.get();
     std::unordered_map<std::string, void *> calib_data;
@@ -168,7 +172,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
       calib_data.emplace(x, t.data<void>());
     }
     temp_calibrator->setBatch(calib_data);
-    RunNative(scope, dev_place);
+    RunNativeImpl(scope, dev_place);
   }
 
   void RunTrt(const framework::Scope &scope,
@@ -178,7 +182,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
       trt_engine_.reset(
           new TensorRTEngine(max_batch_size_, workspace_size_, nullptr,
                              boost::get<platform::CUDAPlace>(dev_place).device,
-                             precision_mode_, calibrator_.get()));
+                             enable_int8_, calibrator_.get()));
       Prepare(scope, dev_place, trt_engine_.get());
     }
 
-- 
GitLab