[Paddle-TRT] Full support for ops with persistable input (#45545)

* Move ITensor construction for Weight (persistable variable) from OpConvert to TensorRTEngine.

[Paddle-TRT] Full support for ops with persistable input (#45545)
* Move ITensor construction for Weight (persistable variable) from OpConvert to TensorRTEngine.
668ffd59 · zhoutianzi666 · GitHub · 048c4e38 · 668ffd59 · 668ffd59
4 changed file
--- a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
+++ b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc
@@ -51,13 +51,6 @@ class CustomPluginCreater : public OpConverter {
    auto &op_input_names = framework::OpMetaInfoHelper::GetInputs(op_info);
    for (auto &param_name : op_input_names) {
      for (auto &arg_name : op_desc.Input(param_name)) {
-        framework::Variable *X_v = nullptr;
-        X_v = scope.FindVar(arg_name);
-        // If this weight is not shared between ops, it need to be convtered to
-        // itensor
-        if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
-          ConvertWeight2ITensor(scope, arg_name);
-        }
        inputs.push_back(engine_->GetITensor(arg_name));
      }
    }
@@ -193,14 +186,6 @@ class GenericPluginCreater : public OpConverter {

    for (auto &param_name : phi_kernel_signature.input_names) {
      for (auto &arg_name : op_desc.Input(param_name)) {
-        framework::Variable *X_v = nullptr;
-        X_v = scope.FindVar(arg_name);
-        // If this weight is not shared between ops, it need to be convtered to
-        // itensor
-        if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
-          ConvertWeight2ITensor(scope, arg_name);
-        }
-
        inputs.push_back(engine_->GetITensor(arg_name));
        auto *var = block_desc.FindVar(arg_name);
        PADDLE_ENFORCE_NOT_NULL(

--- a/paddle/fluid/inference/tensorrt/convert/op_converter.h
+++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h
@@ -178,6 +178,7 @@ class OpConverter {
                                        op_desc.Type()));

    it->SetEngine(engine);
+    engine->SetScope(scope);
    it->SetBlockDesc(block);
    (*it)(op, scope, test_mode);

@@ -255,31 +256,6 @@ class OpConverter {
                    const framework::Scope& scope,
                    TensorRTEngine* engine) {
    std::unique_lock<std::mutex> lk(mut_);
-    for (int i = 0; i < block.ops_size(); i++) {
-      SetEngine(engine);
-      const auto& op = block.ops(i);
-      framework::OpDesc op_desc(op, nullptr);
-      framework::Variable* X_v = nullptr;
-      std::string X_name;
-      // inputs : string -> std::vector<string>
-      auto inputs = op_desc.Inputs();
-      if (inputs.count("X")) {
-        X_name = op_desc.Input("X")[0];
-      } else if (inputs.count("Input")) {
-        X_name = op_desc.Input("Input")[0];
-      } else if (inputs.count("Y")) {
-        X_name = op_desc.Input("Y")[0];
-      }
-      X_v = scope.FindVar(X_name);
-      // If this weight is shared between ops, it needn't to be convtered to
-      // itensor once again
-      if (engine->GetITensorMap()->count(X_name)) {
-        continue;
-      }
-      if (X_v) {
-        ConvertWeight2ITensor(scope, X_name);
-      }
-    }
    for (int i = 0; i < block.ops_size(); i++) {
      const auto& op = block.ops(i);
      ConvertOp(op, parameters, scope, engine, false, &block);
@@ -596,35 +572,6 @@ class OpConverter {
    return Add1DConstantLayer(input_data, weight_name, scalar);
  }

-  // For cases when input is not middle-tensor , but persistable tensor
-  // you should call this.
-  nvinfer1::ITensor* ConvertWeight2ITensor(const framework::Scope& scope,
-                                           const std::string& name) {
-    auto* var_v = scope.FindVar(name);
-    auto* var_t = var_v->GetMutable<framework::LoDTensor>();
-    auto weight = engine_->GetTrtWeight(name, *var_t);
-
-    // Now we have create weights, then we need create a itensor
-    auto var_dims = var_t->dims();
-    nvinfer1::Dims trt_in_shape;
-    trt_in_shape.nbDims = var_t->dims().size();
-    for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
-      trt_in_shape.d[i] = var_dims[i];
-    }
-    // In fact , this is not always right, because we can't determine if the 0th
-    // dimension is batch. Just for run chenqu's model
-    if (!engine_->with_dynamic_shape()) {
-      trt_in_shape.nbDims--;
-      for (int i = 0; i < trt_in_shape.nbDims; i++) {
-        trt_in_shape.d[i] = trt_in_shape.d[i + 1];
-      }
-    }
-    nvinfer1::ILayer* layer =
-        TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_in_shape, weight.get());
-    engine_->SetITensor(name, layer->getOutput(0));
-    return layer->getOutput(0);
-  }
-
  void RreplenishLayerAndOutput(
      nvinfer1::ILayer* layer,
      const std::string& layer_type,

--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -369,11 +369,47 @@ void TensorRTEngine::SetITensor(const std::string &name,
 }

 nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
-  PADDLE_ENFORCE_EQ(itensor_map_.count(name),
-                    true,
-                    platform::errors::NotFound(
-                        "Tensor named %s is not found in TRT engine", name));
-  return itensor_map_[name];
+  if (itensor_map_.count(name)) {
+    return itensor_map_[name];
+  } else {
+    ConvertWeight2ITensor(name);
+    return itensor_map_[name];
+  }
+}
+
+// For cases when input is not middle-tensor , but persistable tensor
+// you should call this.
+nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
+    const std::string &name) {
+  auto *var_v = scope_->FindVar(name);
+  PADDLE_ENFORCE_NOT_NULL(
+      var_v,
+      platform::errors::NotFound("You are converting a persistable weight to a "
+                                 "tensor, but there is no "
+                                 "persistable variable called %s in scope.",
+                                 name));
+  auto *var_t = var_v->GetMutable<framework::LoDTensor>();
+  auto weight = this->GetTrtWeight(name, *var_t);
+
+  // Now we have create weights, then we need create a itensor
+  auto var_dims = var_t->dims();
+  nvinfer1::Dims trt_in_shape;
+  trt_in_shape.nbDims = var_t->dims().size();
+  for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
+    trt_in_shape.d[i] = var_dims[i];
+  }
+  // In fact , this is not always right, because we can't determine if the 0th
+  // dimension is batch. Just for run chenqu's model
+  if (!this->with_dynamic_shape()) {
+    trt_in_shape.nbDims--;
+    for (int i = 0; i < trt_in_shape.nbDims; i++) {
+      trt_in_shape.d[i] = trt_in_shape.d[i + 1];
+    }
+  }
+  nvinfer1::ILayer *layer =
+      TRT_ENGINE_ADD_LAYER(this, Constant, trt_in_shape, weight.get());
+  this->SetITensor(name, layer->getOutput(0));
+  return layer->getOutput(0);
 }

 std::unordered_map<std::string, nvinfer1::ITensor *>

--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -24,9 +24,9 @@ limitations under the License. */
 #include <unordered_set>
 #include <utility>
 #include <vector>
-
 #include "NvInferRuntimeCommon.h"
 #include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/framework/tensor.h"
 #include "paddle/fluid/framework/tensor_util.h"
 #include "paddle/fluid/inference/api/paddle_analysis_config.h"
@@ -283,6 +283,7 @@ class TensorRTEngine {
  void SetITensor(const std::string& name, nvinfer1::ITensor* tensor);
  // Get an ITensor called name.
  nvinfer1::ITensor* GetITensor(const std::string& name);
+  nvinfer1::ITensor* ConvertWeight2ITensor(const std::string& name);
  std::unordered_map<std::string, nvinfer1::ITensor*>* GetITensorMap();

  nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); }
@@ -691,12 +692,15 @@ class TensorRTEngine {
  void GetEngineInfo();

  void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; }
+  void SetScope(const framework::Scope& scope) { scope_ = &scope; }

 private:
  // Each ICudaEngine object is bound to a specific GPU when it is instantiated,
  // ensure that the thread is associated with the correct device by calling
  // freshDeviceId().
  void freshDeviceId();
+  // Used for convert weight into Itensor
+  const framework::Scope* scope_;

  // the max batch size
  int max_batch_;