From 3fd6f09fb413e56d067cd0ec5097546ddf0e0ec9 Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com> Date: Wed, 6 Jul 2022 18:45:44 +0800 Subject: [PATCH] [Paddle-TRT] support inpus is weight (#44051) * support inpus is weight --- .../inference/tensorrt/convert/op_converter.h | 165 ++++++++++++++---- paddle/fluid/inference/tensorrt/engine.h | 9 + .../tensorrt/tensorrt_engine_op_test.cc | 1 - 3 files changed, 140 insertions(+), 35 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index d179e8bb34c..8bcc926b856 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -230,10 +230,54 @@ class OpConverter { const framework::Scope& scope, TensorRTEngine* engine) { std::unique_lock lk(mut_); + for (int i = 0; i < block.ops_size(); i++) { + SetEngine(engine); + const auto& op = block.ops(i); + framework::OpDesc op_desc(op, nullptr); + framework::Variable* X_v = nullptr; + std::string X_name; + // inputs : string -> std::vector + auto inputs = op_desc.Inputs(); + if (inputs.count("X")) { + X_name = op_desc.Input("X")[0]; + } else if (inputs.count("Input")) { + X_name = op_desc.Input("Input")[0]; + } else if (inputs.count("Y")) { + X_name = op_desc.Input("Y")[0]; + } + X_v = scope.FindVar(X_name); + // If this weight is shared between ops, it needn't to be convtered to + // itensor once again + if (engine->GetITensorMap()->count(X_name)) { + continue; + } + if (X_v) { + ConvertWeight2ITensor(scope, X_name); + } + } for (int i = 0; i < block.ops_size(); i++) { const auto& op = block.ops(i); ConvertOp(op, parameters, scope, engine); } + for (int i = 0; i < engine->network()->getNbLayers(); i++) { + auto layer = engine->network()->getLayer(i); + if (layer->getType() == nvinfer1::LayerType::kSHUFFLE) { + auto* input_tensor = layer->getInput(0); + auto* output_tensor = layer->getOutput(0); + auto output_tensor_name = output_tensor->getName(); + auto input_tensor_name = input_tensor->getName(); + if (engine->DynamicRangeIsSet(input_tensor) && + !engine->DynamicRangeIsSet(output_tensor)) { + float output_scale = engine->GetTensorDynamicRange(input_tensor); + VLOG(1) << "Set output tensor scale = " << output_scale + << " for tensor in TensorRT: " << output_tensor_name << "."; + engine->SetTensorDynamicRange(output_tensor, output_scale); + } else { + VLOG(1) << "Failed to get input tensor scale for tensor in TensorRT: " + << input_tensor_name << "."; + } + } + } } // The scope here should be inited with the parameter vars. @@ -273,8 +317,8 @@ class OpConverter { continue; } std::vector input_shape; - input_shape.push_back(-1); - for (size_t i = 1; i < ranks; i++) { + // input_shape.push_back(-1); + for (size_t i = 0; i < ranks; i++) { if (min_input_shape[i] != max_input_shape[i]) { input_shape.push_back(-1); } else { @@ -402,6 +446,17 @@ class OpConverter { return c; } + nvinfer1::ITensor* FloorDiv(nvinfer1::ITensor* a, nvinfer1::ITensor* b) { + nvinfer1::ITensor* c = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *a, + *b, + nvinfer1::ElementWiseOperation::kFLOOR_DIV) + ->getOutput(0); + return c; + } + nvinfer1::ITensor* Act(nvinfer1::ITensor* a, nvinfer1::ActivationType act_type) { nvinfer1::ITensor* c = @@ -422,22 +477,27 @@ class OpConverter { ->getOutput(0); return tensor; } - - // Create and add Multi-D constant float layer - nvinfer1::ITensor* AddConstantLayer(const float* data, + template + // Create and add Multi-D constant float/int32 layer + nvinfer1::ITensor* AddConstantLayer(const T* data, const std::vector& weight_dims, const std::string& weight_name) { - std::unique_ptr tmp_tensor(new framework::Tensor()); int data_size = std::accumulate( weight_dims.begin(), weight_dims.end(), 1, std::multiplies()); + std::unique_ptr tmp_tensor(new framework::Tensor()); tmp_tensor->Resize({data_size}); - auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); + auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); for (int i = 0; i < data_size; i++) { tmp_data[i] = data[i]; } engine_->SetWeights(weight_name, std::move(tmp_tensor)); - TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT, + nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; + if (std::is_integral::value) { + trt_dtype = nvinfer1::DataType::kINT32; + } + + TensorRTEngine::Weight weight{trt_dtype, static_cast(tmp_data), static_cast(data_size)}; nvinfer1::Dims trt_dims; @@ -449,44 +509,26 @@ class OpConverter { return const_layer->getOutput(0); } - // Create and add 1D constant float layer - nvinfer1::ITensor* Add1DConstantLayer(const std::vector& data, + // Create and add 1D constant float/int32 layer + template + nvinfer1::ITensor* Add1DConstantLayer(const std::vector& data, const std::string& weight_name = "", bool scalar = false) { std::unique_ptr tmp_tensor(new framework::Tensor()); int data_size = data.size(); tmp_tensor->Resize({data_size}); - auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); + auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); for (int i = 0; i < data_size; i++) { tmp_data[i] = data[i]; } engine_->SetWeights(weight_name, std::move(tmp_tensor)); - TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT, - static_cast(tmp_data), - static_cast(data_size)}; - nvinfer1::Dims input_shape; - input_shape.nbDims = scalar ? 0 : 1; - input_shape.d[0] = data_size; - auto const_layer = - TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get()); - return const_layer->getOutput(0); - } - - // Create and add 1D constant layer - nvinfer1::ITensor* Add1DConstantLayer(const std::vector& data, - const std::string& weight_name = "", - bool scalar = false) { - std::unique_ptr tmp_tensor(new framework::Tensor()); - int data_size = data.size(); - tmp_tensor->Resize({data_size}); - auto* tmp_data = tmp_tensor->mutable_data(platform::CPUPlace()); - for (int i = 0; i < data_size; i++) { - tmp_data[i] = data[i]; + nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; + if (std::is_integral::value) { + trt_dtype = nvinfer1::DataType::kINT32; } - engine_->SetWeights(weight_name, std::move(tmp_tensor)); - TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32, + TensorRTEngine::Weight weight{trt_dtype, static_cast(tmp_data), static_cast(data_size)}; nvinfer1::Dims input_shape; @@ -513,6 +555,61 @@ class OpConverter { return Add1DConstantLayer(tmp_data, weight_name, scalar); } + // For cases when input is not middle-tensor , but persistable tensor + // you should call this. + nvinfer1::ITensor* ConvertWeight2ITensor(const framework::Scope& scope, + const std::string& name) { + auto* var_v = scope.FindVar(name); + auto* var_t = var_v->GetMutable(); + void* trt_ptr = nullptr; + size_t trt_num = static_cast(var_t->numel()); + nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; + if (var_t->dtype() == phi::DataType::FLOAT32) { + float* data_ptr = engine_->GetWeightCPUData(name, var_t); + trt_ptr = static_cast(data_ptr); + } else if (var_t->dtype() == phi::DataType::INT32) { + int32_t* data_ptr = engine_->GetWeightCPUData(name, var_t); + trt_ptr = static_cast(data_ptr); + trt_dtype = nvinfer1::DataType::kINT32; + } else if (var_t->dtype() == phi::DataType::INT64) { + int64_t* data_ptr = engine_->GetWeightCPUData(name, var_t); + // We must create a new framework::Tensor() + std::unique_ptr new_var_t(new framework::Tensor()); + new_var_t->Resize({var_t->numel()}); + int32_t* new_data_ptr = + new_var_t->mutable_data(platform::CPUPlace()); + for (size_t i = 0; i < trt_num; i++) { + new_data_ptr[i] = data_ptr[i]; + } + engine_->SetWeights(name, std::move(new_var_t)); + trt_ptr = static_cast(new_data_ptr); + trt_dtype = nvinfer1::DataType::kINT32; + } else { + PADDLE_THROW(platform::errors::InvalidArgument( + "Unsupported datatype in TensorRT")); + } + // Now we have create weights, then we need create a itensor + auto var_dims = var_t->dims(); + nvinfer1::Dims trt_in_shape; + trt_in_shape.nbDims = var_t->dims().size(); + for (int64_t i = 0; i < trt_in_shape.nbDims; i++) { + trt_in_shape.d[i] = var_dims[i]; + } + // In fact , this is not always right, because we can't determine if the 0th + // dimension is batch. Just for run chenqu's model + if (!engine_->with_dynamic_shape()) { + trt_in_shape.nbDims--; + for (int i = 0; i < trt_in_shape.nbDims; i++) { + trt_in_shape.d[i] = trt_in_shape.d[i + 1]; + } + } + TensorRTEngine::Weight weight{trt_dtype, trt_ptr, trt_num}; + nvinfer1::ILayer* layer = + TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_in_shape, weight.get()); + engine_->SetITensor(name, layer->getOutput(0)); + return layer->getOutput(0); + } + void RreplenishLayerAndOutput( nvinfer1::ILayer* layer, const std::string& layer_type, diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index c75f7dd17cb..5c2bb6e0ca0 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -406,6 +406,15 @@ class TensorRTEngine { void SetTensorDynamicRange(nvinfer1::ITensor* tensor, float range) { quant_dynamic_range_[tensor] = range; } + + float GetTensorDynamicRange(nvinfer1::ITensor* tensor) { + return quant_dynamic_range_[tensor]; + } + + bool DynamicRangeIsSet(nvinfer1::ITensor* tensor) { + return quant_dynamic_range_.count(tensor); + } + template T* GetWeightCPUData(const std::string& name, framework::Tensor* weight_tensor); diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index 08a71ad713a..cbe14195d41 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -150,7 +150,6 @@ void DynamicShapeTest(bool allow_build_at_runtime) { else CreateCUDATensor(&scope, "x", std::vector({2, 4, 1, 1})); CreateCUDATensor(&scope, "y", std::vector({4, 6})); - CreateCUDATensor(&scope, "z", std::vector({2, 6})); CreateCUDATensor(&scope, "y0", std::vector({6, 8})); CreateCUDATensor(&scope, "z0", std::vector({2, 8})); -- GitLab