未验证 提交 adb2f5e6 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Paddle-TRT] Full support for ops with persistable input (#45545) (#46280)

* Move ITensor construction for Weight (persistable variable) from OpConvert to TensorRTEngine.
上级 42e56f65
......@@ -51,13 +51,6 @@ class CustomPluginCreater : public OpConverter {
auto &op_input_names = framework::OpMetaInfoHelper::GetInputs(op_info);
for (auto &param_name : op_input_names) {
for (auto &arg_name : op_desc.Input(param_name)) {
framework::Variable *X_v = nullptr;
X_v = scope.FindVar(arg_name);
// If this weight is not shared between ops, it need to be convtered to
// itensor
if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
ConvertWeight2ITensor(scope, arg_name);
}
inputs.push_back(engine_->GetITensor(arg_name));
}
}
......@@ -193,14 +186,6 @@ class GenericPluginCreater : public OpConverter {
for (auto &param_name : phi_kernel_signature.input_names) {
for (auto &arg_name : op_desc.Input(param_name)) {
framework::Variable *X_v = nullptr;
X_v = scope.FindVar(arg_name);
// If this weight is not shared between ops, it need to be convtered to
// itensor
if (X_v && !engine_->GetITensorMap()->count(arg_name)) {
ConvertWeight2ITensor(scope, arg_name);
}
inputs.push_back(engine_->GetITensor(arg_name));
auto *var = block_desc.FindVar(arg_name);
PADDLE_ENFORCE_NOT_NULL(
......
......@@ -178,6 +178,7 @@ class OpConverter {
op_desc.Type()));
it->SetEngine(engine);
engine->SetScope(scope);
it->SetBlockDesc(block);
(*it)(op, scope, test_mode);
......@@ -255,31 +256,6 @@ class OpConverter {
const framework::Scope& scope,
TensorRTEngine* engine) {
std::unique_lock<std::mutex> lk(mut_);
for (int i = 0; i < block.ops_size(); i++) {
SetEngine(engine);
const auto& op = block.ops(i);
framework::OpDesc op_desc(op, nullptr);
framework::Variable* X_v = nullptr;
std::string X_name;
// inputs : string -> std::vector<string>
auto inputs = op_desc.Inputs();
if (inputs.count("X")) {
X_name = op_desc.Input("X")[0];
} else if (inputs.count("Input")) {
X_name = op_desc.Input("Input")[0];
} else if (inputs.count("Y")) {
X_name = op_desc.Input("Y")[0];
}
X_v = scope.FindVar(X_name);
// If this weight is shared between ops, it needn't to be convtered to
// itensor once again
if (engine->GetITensorMap()->count(X_name)) {
continue;
}
if (X_v) {
ConvertWeight2ITensor(scope, X_name);
}
}
for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i);
ConvertOp(op, parameters, scope, engine, false, &block);
......@@ -596,35 +572,6 @@ class OpConverter {
return Add1DConstantLayer(input_data, weight_name, scalar);
}
// For cases when input is not middle-tensor , but persistable tensor
// you should call this.
nvinfer1::ITensor* ConvertWeight2ITensor(const framework::Scope& scope,
const std::string& name) {
auto* var_v = scope.FindVar(name);
auto* var_t = var_v->GetMutable<framework::LoDTensor>();
auto weight = engine_->GetTrtWeight(name, *var_t);
// Now we have create weights, then we need create a itensor
auto var_dims = var_t->dims();
nvinfer1::Dims trt_in_shape;
trt_in_shape.nbDims = var_t->dims().size();
for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = var_dims[i];
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
if (!engine_->with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = trt_in_shape.d[i + 1];
}
}
nvinfer1::ILayer* layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_in_shape, weight.get());
engine_->SetITensor(name, layer->getOutput(0));
return layer->getOutput(0);
}
void RreplenishLayerAndOutput(
nvinfer1::ILayer* layer,
const std::string& layer_type,
......
......@@ -369,11 +369,47 @@ void TensorRTEngine::SetITensor(const std::string &name,
}
nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
PADDLE_ENFORCE_EQ(itensor_map_.count(name),
true,
platform::errors::NotFound(
"Tensor named %s is not found in TRT engine", name));
return itensor_map_[name];
if (itensor_map_.count(name)) {
return itensor_map_[name];
} else {
ConvertWeight2ITensor(name);
return itensor_map_[name];
}
}
// For cases when input is not middle-tensor , but persistable tensor
// you should call this.
nvinfer1::ITensor *TensorRTEngine::ConvertWeight2ITensor(
const std::string &name) {
auto *var_v = scope_->FindVar(name);
PADDLE_ENFORCE_NOT_NULL(
var_v,
platform::errors::NotFound("You are converting a persistable weight to a "
"tensor, but there is no "
"persistable variable called %s in scope.",
name));
auto *var_t = var_v->GetMutable<framework::LoDTensor>();
auto weight = this->GetTrtWeight(name, *var_t);
// Now we have create weights, then we need create a itensor
auto var_dims = var_t->dims();
nvinfer1::Dims trt_in_shape;
trt_in_shape.nbDims = var_t->dims().size();
for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = var_dims[i];
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
if (!this->with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = trt_in_shape.d[i + 1];
}
}
nvinfer1::ILayer *layer =
TRT_ENGINE_ADD_LAYER(this, Constant, trt_in_shape, weight.get());
this->SetITensor(name, layer->getOutput(0));
return layer->getOutput(0);
}
std::unordered_map<std::string, nvinfer1::ITensor *>
......
......@@ -24,9 +24,9 @@ limitations under the License. */
#include <unordered_set>
#include <utility>
#include <vector>
#include "NvInferRuntimeCommon.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
......@@ -283,6 +283,7 @@ class TensorRTEngine {
void SetITensor(const std::string& name, nvinfer1::ITensor* tensor);
// Get an ITensor called name.
nvinfer1::ITensor* GetITensor(const std::string& name);
nvinfer1::ITensor* ConvertWeight2ITensor(const std::string& name);
std::unordered_map<std::string, nvinfer1::ITensor*>* GetITensorMap();
nvinfer1::ICudaEngine* engine() { return infer_engine_.get(); }
......@@ -691,12 +692,15 @@ class TensorRTEngine {
void GetEngineInfo();
void SetUseInspector(bool use_inspector) { use_inspector_ = use_inspector; }
void SetScope(const framework::Scope& scope) { scope_ = &scope; }
private:
// Each ICudaEngine object is bound to a specific GPU when it is instantiated,
// ensure that the thread is associated with the correct device by calling
// freshDeviceId().
void freshDeviceId();
// Used for convert weight into Itensor
const framework::Scope* scope_;
// the max batch size
int max_batch_;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册