From 668ffd5964473a13bfd5c6ffcc285b66777f745d Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com> Date: Tue, 20 Sep 2022 15:09:08 +0800 Subject: [PATCH] [Paddle-TRT] Full support for ops with persistable input (#45545) * Move ITensor construction for Weight (persistable variable) from OpConvert to TensorRTEngine. --- .../generic_and_custom_plugin_creater.cc | 15 ----- .../inference/tensorrt/convert/op_converter.h | 55 +------------------ paddle/fluid/inference/tensorrt/engine.cc | 46 ++++++++++++++-- paddle/fluid/inference/tensorrt/engine.h | 6 +- 4 files changed, 47 insertions(+), 75 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc index e1ce9ceb02..b5d9a50f06 100644 --- a/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc +++ b/paddle/fluid/inference/tensorrt/convert/generic_and_custom_plugin_creater.cc @@ -51,13 +51,6 @@ class CustomPluginCreater : public OpConverter { auto &op_input_names = framework::OpMetaInfoHelper::GetInputs(op_info); for (auto ¶m_name : op_input_names) { for (auto &arg_name : op_desc.Input(param_name)) { - framework::Variable *X_v = nullptr; - X_v = scope.FindVar(arg_name); - // If this weight is not shared between ops, it need to be convtered to - // itensor - if (X_v && !engine_->GetITensorMap()->count(arg_name)) { - ConvertWeight2ITensor(scope, arg_name); - } inputs.push_back(engine_->GetITensor(arg_name)); } } @@ -193,14 +186,6 @@ class GenericPluginCreater : public OpConverter { for (auto ¶m_name : phi_kernel_signature.input_names) { for (auto &arg_name : op_desc.Input(param_name)) { - framework::Variable *X_v = nullptr; - X_v = scope.FindVar(arg_name); - // If this weight is not shared between ops, it need to be convtered to - // itensor - if (X_v && !engine_->GetITensorMap()->count(arg_name)) { - ConvertWeight2ITensor(scope, arg_name); - } - inputs.push_back(engine_->GetITensor(arg_name)); auto *var = block_desc.FindVar(arg_name); PADDLE_ENFORCE_NOT_NULL( diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 095457dbfb..85a9b9d2fb 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -178,6 +178,7 @@ class OpConverter { op_desc.Type())); it->SetEngine(engine); + engine->SetScope(scope); it->SetBlockDesc(block); (*it)(op, scope, test_mode); @@ -255,31 +256,6 @@ class OpConverter { const framework::Scope& scope, TensorRTEngine* engine) { std::unique_lock lk(mut_); - for (int i = 0; i < block.ops_size(); i++) { - SetEngine(engine); - const auto& op = block.ops(i); - framework::OpDesc op_desc(op, nullptr); - framework::Variable* X_v = nullptr; - std::string X_name; - // inputs : string -> std::vector - auto inputs = op_desc.Inputs(); - if (inputs.count("X")) { - X_name = op_desc.Input("X")[0]; - } else if (inputs.count("Input")) { - X_name = op_desc.Input("Input")[0]; - } else if (inputs.count("Y")) { - X_name = op_desc.Input("Y")[0]; - } - X_v = scope.FindVar(X_name); - // If this weight is shared between ops, it needn't to be convtered to - // itensor once again - if (engine->GetITensorMap()->count(X_name)) { - continue; - } - if (X_v) { - ConvertWeight2ITensor(scope, X_name); - } - } for (int i = 0; i < block.ops_size(); i++) { const auto& op = block.ops(i); ConvertOp(op, parameters, scope, engine, false, &block); @@ -596,35 +572,6 @@ class OpConverter { return Add1DConstantLayer(input_data, weight_name, scalar); } - // For cases when input is not middle-tensor , but persistable tensor - // you should call this. - nvinfer1::ITensor* ConvertWeight2ITensor(const framework::Scope& scope, - const std::string& name) { - auto* var_v = scope.FindVar(name); - auto* var_t = var_v->GetMutable(); - auto weight = engine_->GetTrtWeight(name, *var_t); - - // Now we have create weights, then we need create a itensor - auto var_dims = var_t->dims(); - nvinfer1::Dims trt_in_shape; - trt_in_shape.nbDims = var_t->dims().size(); - for (int64_t i = 0; i < trt_in_shape.nbDims; i++) { - trt_in_shape.d[i] = var_dims[i]; - } - // In fact , this is not always right, because we can't determine if the 0th - // dimension is batch. Just for run chenqu's model - if (!engine_->with_dynamic_shape()) { - trt_in_shape.nbDims--; - for (int i = 0; i < trt_in_shape.nbDims; i++) { - trt_in_shape.d[i] = trt_in_shape.d[i + 1]; - } - } - nvinfer1::ILayer* layer = - TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_in_shape, weight.get()); - engine_->SetITensor(name, layer->getOutput(0)); - return layer->getOutput(0); - } - void RreplenishLayerAndOutput( nvinfer1::ILayer* layer, const std::string& layer_type, diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index e70a49c685..0cf1d6352c 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -369,11 +369,47 @@ void TensorRTEngine::SetITensor(const std::string &name, } nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) { - PADDLE_ENFORCE_EQ(itensor_map_.count(name), - true, - platform::errors::NotFound( - "Tensor named %s is not found in TRT engine", name)); - return itensor_map_[name]; + if (itensor_map_.count(name)) { + return itensor_map_[name]; + } else { + ConvertWeight2ITensor(name); + return itensor_map_[name]; + } +} + +// For cases when input is not middle-tensor , but persistable tensor +// you should call this. +nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor( + const std::string &name) { + auto *var_v = scope_->FindVar(name); + PADDLE_ENFORCE_NOT_NULL( + var_v, + platform::errors::NotFound("You are converting a persistable weight to a " + "tensor, but there is no " + "persistable variable called %s in scope.", + name)); + auto *var_t = var_v->GetMutable(); + auto weight = this->GetTrtWeight(name, *var_t); + + // Now we have create weights, then we need create a itensor + auto var_dims = var_t->dims(); + nvinfer1::Dims trt_in_shape; + trt_in_shape.nbDims = var_t->dims().size(); + for (int64_t i = 0; i < trt_in_shape.nbDims; i++) { + trt_in_shape.d[i] = var_dims[i]; + } + // In fact , this is not always right, because we can't determine if the 0th + // dimension is batch. Just for run chenqu's model + if (!this->with_dynamic_shape()) { + trt_in_shape.nbDims--; + for (int i = 0; i < trt_in_shape.nbDims; i++) { + trt_in_shape.d[i] = trt_in_shape.d[i + 1]; + } + } + nvinfer1::ILayer *layer = + TRT_ENGINE_ADD_LAYER(this, Constant, trt_in_shape, weight.get()); + this->SetITensor(name, layer->getOutput(0)); + return layer->getOutput(0); } std::unordered_map diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 861a2aa8df..209f297a06 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -24,9 +24,9 @@ limitations under the License. */ #include #include #include - #include "NvInferRuntimeCommon.h" #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h" @@ -283,6 +283,7 @@ class TensorRTEngine { void SetITensor(const std::string& name, nvinfer1::ITensor* tensor); // Get an ITensor called name. nvinfer1::ITensor* GetITensor(const std::string& name); + nvinfer1::ITensor* ConvertWeight2ITensor(const std::string& name); std::unordered_map* GetITensorMap(); nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); } @@ -691,12 +692,15 @@ class TensorRTEngine { void GetEngineInfo(); void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; } + void SetScope(const framework::Scope& scope) { scope_ = &scope; } private: // Each ICudaEngine object is bound to a specific GPU when it is instantiated, // ensure that the thread is associated with the correct device by calling // freshDeviceId(). void freshDeviceId(); + // Used for convert weight into Itensor + const framework::Scope* scope_; // the max batch size int max_batch_; -- GitLab