From 668ffd5964473a13bfd5c6ffcc285b66777f745d Mon Sep 17 00:00:00 2001
From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com>
Date: Tue, 20 Sep 2022 15:09:08 +0800
Subject: [PATCH] [Paddle-TRT] Full support for ops with persistable input
 (#45545)

* Move ITensor construction for Weight (persistable variable) from OpConvert to TensorRTEngine.
---
 .../generic_and_custom_plugin_creater.cc      | 15 -----
 .../inference/tensorrt/convert/op_converter.h | 55 +------------------
 paddle/fluid/inference/tensorrt/engine.cc     | 46 ++++++++++++++--
 paddle/fluid/inference/tensorrt/engine.h      |  6 +-
 4 files changed, 47 insertions(+), 75 deletions(-)
diff --git a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
index e1ce9ceb020..b5d9a50f06d 100644
--- a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
+++ b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
@@ -51,13 +51,6 @@ class CustomPluginCreater : public OpConverter {
     auto &op_input_names = framework::OpMetaInfoHelper::GetInputs(op_info);
     for (auto &param_name : op_input_names) {
       for (auto &arg_name : op_desc.Input(param_name)) {
-        framework::Variable *X_v = nullptr;
-        X_v = scope.FindVar(arg_name);
-        // If this weight is not shared between ops, it need to be convtered to
-        // itensor
-        if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
-          ConvertWeight2ITensor(scope, arg_name);
-        }
         inputs.push_back(engine_->GetITensor(arg_name));
       }
     }
@@ -193,14 +186,6 @@ class GenericPluginCreater : public OpConverter {
 
     for (auto &param_name : phi_kernel_signature.input_names) {
       for (auto &arg_name : op_desc.Input(param_name)) {
-        framework::Variable *X_v = nullptr;
-        X_v = scope.FindVar(arg_name);
-        // If this weight is not shared between ops, it need to be convtered to
-        // itensor
-        if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
-          ConvertWeight2ITensor(scope, arg_name);
-        }
-
         inputs.push_back(engine_->GetITensor(arg_name));
         auto *var = block_desc.FindVar(arg_name);
         PADDLE_ENFORCE_NOT_NULL(
diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h
index 095457dbfbb..85a9b9d2fb3 100644
--- a/paddle/fluid/inference/tensorrt/convert/op_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h
@@ -178,6 +178,7 @@ class OpConverter {
                                         op_desc.Type()));
 
     it->SetEngine(engine);
+    engine->SetScope(scope);
     it->SetBlockDesc(block);
     (*it)(op, scope, test_mode);
 
@@ -255,31 +256,6 @@ class OpConverter {
                     const framework::Scope& scope,
                     TensorRTEngine* engine) {
     std::unique_lock<std::mutex> lk(mut_);
-    for (int i = 0; i < block.ops_size(); i++) {
-      SetEngine(engine);
-      const auto& op = block.ops(i);
-      framework::OpDesc op_desc(op, nullptr);
-      framework::Variable* X_v = nullptr;
-      std::string X_name;
-      // inputs : string -> std::vector<string>
-      auto inputs = op_desc.Inputs();
-      if (inputs.count("X")) {
-        X_name = op_desc.Input("X")[0];
-      } else if (inputs.count("Input")) {
-        X_name = op_desc.Input("Input")[0];
-      } else if (inputs.count("Y")) {
-        X_name = op_desc.Input("Y")[0];
-      }
-      X_v = scope.FindVar(X_name);
-      // If this weight is shared between ops, it needn't to be convtered to
-      // itensor once again
-      if (engine->GetITensorMap()->count(X_name)) {
-        continue;
-      }
-      if (X_v) {
-        ConvertWeight2ITensor(scope, X_name);
-      }
-    }
     for (int i = 0; i < block.ops_size(); i++) {
       const auto& op = block.ops(i);
       ConvertOp(op, parameters, scope, engine, false, &block);
@@ -596,35 +572,6 @@ class OpConverter {
     return Add1DConstantLayer(input_data, weight_name, scalar);
   }
 
-  // For cases when input is not middle-tensor , but persistable tensor
-  // you should call this.
-  nvinfer1::ITensor* ConvertWeight2ITensor(const framework::Scope& scope,
-                                           const std::string& name) {
-    auto* var_v = scope.FindVar(name);
-    auto* var_t = var_v->GetMutable<framework::LoDTensor>();
-    auto weight = engine_->GetTrtWeight(name, *var_t);
-
-    // Now we have create weights, then we need create a itensor
-    auto var_dims = var_t->dims();
-    nvinfer1::Dims trt_in_shape;
-    trt_in_shape.nbDims = var_t->dims().size();
-    for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
-      trt_in_shape.d[i] = var_dims[i];
-    }
-    // In fact , this is not always right, because we can't determine if the 0th
-    // dimension is batch. Just for run chenqu's model
-    if (!engine_->with_dynamic_shape()) {
-      trt_in_shape.nbDims--;
-      for (int i = 0; i < trt_in_shape.nbDims; i++) {
-        trt_in_shape.d[i] = trt_in_shape.d[i + 1];
-      }
-    }
-    nvinfer1::ILayer* layer =
-        TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_in_shape, weight.get());
-    engine_->SetITensor(name, layer->getOutput(0));
-    return layer->getOutput(0);
-  }
-
   void RreplenishLayerAndOutput(
       nvinfer1::ILayer* layer,
       const std::string& layer_type,
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index e70a49c685e..0cf1d6352c3 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -369,11 +369,47 @@ void TensorRTEngine::SetITensor(const std::string &name,
 }
 
 nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
-  PADDLE_ENFORCE_EQ(itensor_map_.count(name),
-                    true,
-                    platform::errors::NotFound(
-                        "Tensor named %s is not found in TRT engine", name));
-  return itensor_map_[name];
+  if (itensor_map_.count(name)) {
+    return itensor_map_[name];
+  } else {
+    ConvertWeight2ITensor(name);
+    return itensor_map_[name];
+  }
+}
+
+// For cases when input is not middle-tensor , but persistable tensor
+// you should call this.
+nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
+    const std::string &name) {
+  auto *var_v = scope_->FindVar(name);
+  PADDLE_ENFORCE_NOT_NULL(
+      var_v,
+      platform::errors::NotFound("You are converting a persistable weight to a "
+                                 "tensor, but there is no "
+                                 "persistable variable called %s in scope.",
+                                 name));
+  auto *var_t = var_v->GetMutable<framework::LoDTensor>();
+  auto weight = this->GetTrtWeight(name, *var_t);
+
+  // Now we have create weights, then we need create a itensor
+  auto var_dims = var_t->dims();
+  nvinfer1::Dims trt_in_shape;
+  trt_in_shape.nbDims = var_t->dims().size();
+  for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
+    trt_in_shape.d[i] = var_dims[i];
+  }
+  // In fact , this is not always right, because we can't determine if the 0th
+  // dimension is batch. Just for run chenqu's model
+  if (!this->with_dynamic_shape()) {
+    trt_in_shape.nbDims--;
+    for (int i = 0; i < trt_in_shape.nbDims; i++) {
+      trt_in_shape.d[i] = trt_in_shape.d[i + 1];
+    }
+  }
+  nvinfer1::ILayer *layer =
+      TRT_ENGINE_ADD_LAYER(this, Constant, trt_in_shape, weight.get());
+  this->SetITensor(name, layer->getOutput(0));
+  return layer->getOutput(0);
 }
 
 std::unordered_map<std::string, nvinfer1::ITensor *>
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index 861a2aa8dfb..209f297a066 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -24,9 +24,9 @@ limitations under the License. */
 #include <unordered_set>
 #include <utility>
 #include <vector>
-
 #include "NvInferRuntimeCommon.h"
 #include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
@@ -283,6 +283,7 @@ class TensorRTEngine {
   void SetITensor(const std::string& name, nvinfer1::ITensor* tensor);
   // Get an ITensor called name.
   nvinfer1::ITensor* GetITensor(const std::string& name);
+  nvinfer1::ITensor* ConvertWeight2ITensor(const std::string& name);
   std::unordered_map<std::string, nvinfer1::ITensor*>* GetITensorMap();
 
   nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); }
@@ -691,12 +692,15 @@ class TensorRTEngine {
   void GetEngineInfo();
 
   void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; }
+  void SetScope(const framework::Scope& scope) { scope_ = &scope; }
 
  private:
   // Each ICudaEngine object is bound to a specific GPU when it is instantiated,
   // ensure that the thread is associated with the correct device by calling
   // freshDeviceId().
   void freshDeviceId();
+  // Used for convert weight into Itensor
+  const framework::Scope* scope_;
 
   // the max batch size
   int max_batch_;
-- 
GitLab