未验证 提交 3fd6f09f 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Paddle-TRT] support inpus is weight (#44051)

* support inpus is weight
上级 d520029f
...@@ -230,10 +230,54 @@ class OpConverter { ...@@ -230,10 +230,54 @@ class OpConverter {
const framework::Scope& scope, const framework::Scope& scope,
TensorRTEngine* engine) { TensorRTEngine* engine) {
std::unique_lock<std::mutex> lk(mut_); std::unique_lock<std::mutex> lk(mut_);
for (int i = 0; i < block.ops_size(); i++) {
SetEngine(engine);
const auto& op = block.ops(i);
framework::OpDesc op_desc(op, nullptr);
framework::Variable* X_v = nullptr;
std::string X_name;
// inputs : string -> std::vector<string>
auto inputs = op_desc.Inputs();
if (inputs.count("X")) {
X_name = op_desc.Input("X")[0];
} else if (inputs.count("Input")) {
X_name = op_desc.Input("Input")[0];
} else if (inputs.count("Y")) {
X_name = op_desc.Input("Y")[0];
}
X_v = scope.FindVar(X_name);
// If this weight is shared between ops, it needn't to be convtered to
// itensor once again
if (engine->GetITensorMap()->count(X_name)) {
continue;
}
if (X_v) {
ConvertWeight2ITensor(scope, X_name);
}
}
for (int i = 0; i < block.ops_size(); i++) { for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i); const auto& op = block.ops(i);
ConvertOp(op, parameters, scope, engine); ConvertOp(op, parameters, scope, engine);
} }
for (int i = 0; i < engine->network()->getNbLayers(); i++) {
auto layer = engine->network()->getLayer(i);
if (layer->getType() == nvinfer1::LayerType::kSHUFFLE) {
auto* input_tensor = layer->getInput(0);
auto* output_tensor = layer->getOutput(0);
auto output_tensor_name = output_tensor->getName();
auto input_tensor_name = input_tensor->getName();
if (engine->DynamicRangeIsSet(input_tensor) &&
!engine->DynamicRangeIsSet(output_tensor)) {
float output_scale = engine->GetTensorDynamicRange(input_tensor);
VLOG(1) << "Set output tensor scale = " << output_scale
<< " for tensor in TensorRT: " << output_tensor_name << ".";
engine->SetTensorDynamicRange(output_tensor, output_scale);
} else {
VLOG(1) << "Failed to get input tensor scale for tensor in TensorRT: "
<< input_tensor_name << ".";
}
}
}
} }
// The scope here should be inited with the parameter vars. // The scope here should be inited with the parameter vars.
...@@ -273,8 +317,8 @@ class OpConverter { ...@@ -273,8 +317,8 @@ class OpConverter {
continue; continue;
} }
std::vector<int64_t> input_shape; std::vector<int64_t> input_shape;
input_shape.push_back(-1); // input_shape.push_back(-1);
for (size_t i = 1; i < ranks; i++) { for (size_t i = 0; i < ranks; i++) {
if (min_input_shape[i] != max_input_shape[i]) { if (min_input_shape[i] != max_input_shape[i]) {
input_shape.push_back(-1); input_shape.push_back(-1);
} else { } else {
...@@ -402,6 +446,17 @@ class OpConverter { ...@@ -402,6 +446,17 @@ class OpConverter {
return c; return c;
} }
nvinfer1::ITensor* FloorDiv(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_,
ElementWise,
*a,
*b,
nvinfer1::ElementWiseOperation::kFLOOR_DIV)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Act(nvinfer1::ITensor* a, nvinfer1::ITensor* Act(nvinfer1::ITensor* a,
nvinfer1::ActivationType act_type) { nvinfer1::ActivationType act_type) {
nvinfer1::ITensor* c = nvinfer1::ITensor* c =
...@@ -422,22 +477,27 @@ class OpConverter { ...@@ -422,22 +477,27 @@ class OpConverter {
->getOutput(0); ->getOutput(0);
return tensor; return tensor;
} }
template <typename T>
// Create and add Multi-D constant float layer // Create and add Multi-D constant float/int32 layer
nvinfer1::ITensor* AddConstantLayer(const float* data, nvinfer1::ITensor* AddConstantLayer(const T* data,
const std::vector<int32_t>& weight_dims, const std::vector<int32_t>& weight_dims,
const std::string& weight_name) { const std::string& weight_name) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = std::accumulate( int data_size = std::accumulate(
weight_dims.begin(), weight_dims.end(), 1, std::multiplies<int>()); weight_dims.begin(), weight_dims.end(), 1, std::multiplies<int>());
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
tmp_tensor->Resize({data_size}); tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace()); auto* tmp_data = tmp_tensor->mutable_data<T>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) { for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i]; tmp_data[i] = data[i];
} }
engine_->SetWeights(weight_name, std::move(tmp_tensor)); engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT, nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT;
if (std::is_integral<T>::value) {
trt_dtype = nvinfer1::DataType::kINT32;
}
TensorRTEngine::Weight weight{trt_dtype,
static_cast<void*>(tmp_data), static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)}; static_cast<size_t>(data_size)};
nvinfer1::Dims trt_dims; nvinfer1::Dims trt_dims;
...@@ -449,44 +509,26 @@ class OpConverter { ...@@ -449,44 +509,26 @@ class OpConverter {
return const_layer->getOutput(0); return const_layer->getOutput(0);
} }
// Create and add 1D constant float layer // Create and add 1D constant float/int32 layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<float>& data, template <typename T>
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<T>& data,
const std::string& weight_name = "", const std::string& weight_name = "",
bool scalar = false) { bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor()); std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size(); int data_size = data.size();
tmp_tensor->Resize({data_size}); tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace()); auto* tmp_data = tmp_tensor->mutable_data<T>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) { for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i]; tmp_data[i] = data[i];
} }
engine_->SetWeights(weight_name, std::move(tmp_tensor)); engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT, nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT;
static_cast<void*>(tmp_data), if (std::is_integral<T>::value) {
static_cast<size_t>(data_size)}; trt_dtype = nvinfer1::DataType::kINT32;
nvinfer1::Dims input_shape;
input_shape.nbDims = scalar ? 0 : 1;
input_shape.d[0] = data_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
return const_layer->getOutput(0);
}
// Create and add 1D constant layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<int>& data,
const std::string& weight_name = "",
bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
} }
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32, TensorRTEngine::Weight weight{trt_dtype,
static_cast<void*>(tmp_data), static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)}; static_cast<size_t>(data_size)};
nvinfer1::Dims input_shape; nvinfer1::Dims input_shape;
...@@ -513,6 +555,61 @@ class OpConverter { ...@@ -513,6 +555,61 @@ class OpConverter {
return Add1DConstantLayer(tmp_data, weight_name, scalar); return Add1DConstantLayer(tmp_data, weight_name, scalar);
} }
// For cases when input is not middle-tensor , but persistable tensor
// you should call this.
nvinfer1::ITensor* ConvertWeight2ITensor(const framework::Scope& scope,
const std::string& name) {
auto* var_v = scope.FindVar(name);
auto* var_t = var_v->GetMutable<framework::LoDTensor>();
void* trt_ptr = nullptr;
size_t trt_num = static_cast<size_t>(var_t->numel());
nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT;
if (var_t->dtype() == phi::DataType::FLOAT32) {
float* data_ptr = engine_->GetWeightCPUData(name, var_t);
trt_ptr = static_cast<void*>(data_ptr);
} else if (var_t->dtype() == phi::DataType::INT32) {
int32_t* data_ptr = engine_->GetWeightCPUData<int32_t>(name, var_t);
trt_ptr = static_cast<void*>(data_ptr);
trt_dtype = nvinfer1::DataType::kINT32;
} else if (var_t->dtype() == phi::DataType::INT64) {
int64_t* data_ptr = engine_->GetWeightCPUData<int64_t>(name, var_t);
// We must create a new framework::Tensor()
std::unique_ptr<framework::Tensor> new_var_t(new framework::Tensor());
new_var_t->Resize({var_t->numel()});
int32_t* new_data_ptr =
new_var_t->mutable_data<int32_t>(platform::CPUPlace());
for (size_t i = 0; i < trt_num; i++) {
new_data_ptr[i] = data_ptr[i];
}
engine_->SetWeights(name, std::move(new_var_t));
trt_ptr = static_cast<void*>(new_data_ptr);
trt_dtype = nvinfer1::DataType::kINT32;
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"Unsupported datatype in TensorRT"));
}
// Now we have create weights, then we need create a itensor
auto var_dims = var_t->dims();
nvinfer1::Dims trt_in_shape;
trt_in_shape.nbDims = var_t->dims().size();
for (int64_t i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = var_dims[i];
}
// In fact , this is not always right, because we can't determine if the 0th
// dimension is batch. Just for run chenqu's model
if (!engine_->with_dynamic_shape()) {
trt_in_shape.nbDims--;
for (int i = 0; i < trt_in_shape.nbDims; i++) {
trt_in_shape.d[i] = trt_in_shape.d[i + 1];
}
}
TensorRTEngine::Weight weight{trt_dtype, trt_ptr, trt_num};
nvinfer1::ILayer* layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_in_shape, weight.get());
engine_->SetITensor(name, layer->getOutput(0));
return layer->getOutput(0);
}
void RreplenishLayerAndOutput( void RreplenishLayerAndOutput(
nvinfer1::ILayer* layer, nvinfer1::ILayer* layer,
const std::string& layer_type, const std::string& layer_type,
......
...@@ -406,6 +406,15 @@ class TensorRTEngine { ...@@ -406,6 +406,15 @@ class TensorRTEngine {
void SetTensorDynamicRange(nvinfer1::ITensor* tensor, float range) { void SetTensorDynamicRange(nvinfer1::ITensor* tensor, float range) {
quant_dynamic_range_[tensor] = range; quant_dynamic_range_[tensor] = range;
} }
float GetTensorDynamicRange(nvinfer1::ITensor* tensor) {
return quant_dynamic_range_[tensor];
}
bool DynamicRangeIsSet(nvinfer1::ITensor* tensor) {
return quant_dynamic_range_.count(tensor);
}
template <typename T = float> template <typename T = float>
T* GetWeightCPUData(const std::string& name, T* GetWeightCPUData(const std::string& name,
framework::Tensor* weight_tensor); framework::Tensor* weight_tensor);
......
...@@ -150,7 +150,6 @@ void DynamicShapeTest(bool allow_build_at_runtime) { ...@@ -150,7 +150,6 @@ void DynamicShapeTest(bool allow_build_at_runtime) {
else else
CreateCUDATensor(&scope, "x", std::vector<int64_t>({2, 4, 1, 1})); CreateCUDATensor(&scope, "x", std::vector<int64_t>({2, 4, 1, 1}));
CreateCUDATensor(&scope, "y", std::vector<int64_t>({4, 6})); CreateCUDATensor(&scope, "y", std::vector<int64_t>({4, 6}));
CreateCUDATensor(&scope, "z", std::vector<int64_t>({2, 6}));
CreateCUDATensor(&scope, "y0", std::vector<int64_t>({6, 8})); CreateCUDATensor(&scope, "y0", std::vector<int64_t>({6, 8}));
CreateCUDATensor(&scope, "z0", std::vector<int64_t>({2, 8})); CreateCUDATensor(&scope, "z0", std::vector<int64_t>({2, 8}));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册