From e25e86f4f6d1bbd043b621a75e93d0070719c3d8 Mon Sep 17 00:00:00 2001 From: Zhang Jun Date: Mon, 26 Jun 2023 17:19:15 +0800 Subject: [PATCH] [inference][trt] optimize set_value and top_k op (#54372) * set_value update * support ValueTensor's rank != Input'rank & update topk * update range to avoid coredump * fix addShape error * Dims definition differ between 7.2 and 8.0+ * Update test_trt_convert_top_k_v2.py * update top_k * Update test_trt_convert_top_k_v2.py --- .../inference/tensorrt/convert/op_converter.h | 78 +++++++++++- .../inference/tensorrt/convert/range_op.cc | 9 +- .../tensorrt/convert/set_value_op.cc | 49 ++------ .../inference/tensorrt/convert/top_k_op.cc | 112 +++++++++--------- paddle/fluid/inference/tensorrt/op_teller.cc | 54 ++++----- test/ir/inference/test_trt_convert_top_k.py | 2 +- .../ir/inference/test_trt_convert_top_k_v2.py | 13 +- 7 files changed, 178 insertions(+), 139 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 4d12cb128db..3fac3d94381 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -373,6 +373,13 @@ class OpConverter { engine->ClearWeights(); } + nvinfer1::ITensor* Cast(nvinfer1::ITensor* input, nvinfer1::DataType dtype) { + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input); + layer->setOutputType(0, dtype); + layer->getOutput(0)->setType(dtype); + return layer->getOutput(0); + } + // rank(result) = rank(input) nvinfer1::ITensor* Gather(nvinfer1::ITensor* input, const std::vector indices, @@ -384,6 +391,59 @@ class OpConverter { return result; } + nvinfer1::ITensor* Unsqueeze(nvinfer1::ITensor* input, + const std::vector axis) { + const auto dims = input->getDimensions(); + const std::unordered_set axis_data(axis.begin(), axis.end()); + std::vector subscripts(dims.nbDims); + std::iota(subscripts.begin(), subscripts.end(), 0); + for (const auto& axis_value : axis_data) { + subscripts.insert(subscripts.begin() + axis_value, dims.nbDims); + } + nvinfer1::ITensor* input_shape{nullptr}; + if (engine_->with_dynamic_shape()) { + input_shape = Shape(input); + } else { + input_shape = Add1DConstantLayer(dims); + } + auto* new_dim = + TRT_ENGINE_ADD_LAYER(engine_, + Gather, + *Concat(std::vector{ + input_shape, Add1DConstantLayer(1)}), + *Add1DConstantLayer(subscripts), + 0) + ->getOutput(0); + auto result = Reshape(input, new_dim); + return result; + } + + nvinfer1::ITensor* Squeeze(nvinfer1::ITensor* input, + const std::vector axis) { + const auto dims = input->getDimensions(); + std::vector subscripts(dims.nbDims); + std::iota(subscripts.begin(), subscripts.end(), 0); + auto p = + std::remove_if(subscripts.begin(), subscripts.end(), [axis](int x) { + return std::find(axis.begin(), axis.end(), x) != axis.end(); + }); + subscripts.resize(p - subscripts.begin()); + + nvinfer1::ITensor* input_shape{nullptr}; + if (engine_->with_dynamic_shape()) { + input_shape = Shape(input); + } else { + input_shape = Add1DConstantLayer(dims); + } + + auto* new_dim = + TRT_ENGINE_ADD_LAYER( + engine_, Gather, *input_shape, *Add1DConstantLayer(subscripts), 0) + ->getOutput(0); + auto result = Reshape(input, new_dim); + return result; + } + // paddle allows negative index // for axis length = 5, paddle allows [-5, 4] nvinfer1::ITensor* FixNegIndices(nvinfer1::ITensor* input_shape, @@ -406,7 +466,23 @@ class OpConverter { nvinfer1::ITensor* newShape, const std::string& name = "") { auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - shuffle->setInput(1, *newShape); + if (engine_->with_dynamic_shape()) { + shuffle->setInput(1, *newShape); + } else { + auto shape = newShape->getDimensions(); + shuffle->setReshapeDimensions(shape); + } + if (name != "") { + shuffle->setName(name.c_str()); + } + return shuffle->getOutput(0); + } + + nvinfer1::ITensor* Reshape(nvinfer1::ITensor* input, + nvinfer1::Dims shape, + const std::string& name = "") { + auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + shuffle->setReshapeDimensions(shape); if (name != "") { shuffle->setName(name.c_str()); } diff --git a/paddle/fluid/inference/tensorrt/convert/range_op.cc b/paddle/fluid/inference/tensorrt/convert/range_op.cc index 7288f4877b8..6e4db4fb2a4 100644 --- a/paddle/fluid/inference/tensorrt/convert/range_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/range_op.cc @@ -46,8 +46,13 @@ class RangeOpConverter : public OpConverter { quotient_tensor = fquotient_tensor; } auto number_tensor = Max(Sub(zero_tensor, quotient_tensor), zero_tensor); - auto* start1 = engine_->GetITensor(op_desc.Input("Start")[0], true); - + auto* start1 = engine_->GetITensor(op_desc.Input("Start")[0]); +#if IS_TRT_VERSION_LT(8000) + nvinfer1::Dims start_dims{0, {1}, { nvinfer1::DimensionType::kSPATIAL }}; +#else + nvinfer1::Dims start_dims{0, {1}}; +#endif + start1 = Reshape(start1, start_dims); layer = TRT_ENGINE_ADD_LAYER( engine_, Fill, nvinfer1::Dims{}, nvinfer1::FillOperation::kLINSPACE); layer->setInput(0, *number_tensor); diff --git a/paddle/fluid/inference/tensorrt/convert/set_value_op.cc b/paddle/fluid/inference/tensorrt/convert/set_value_op.cc index 016baf012e4..9ba5a7a4d53 100644 --- a/paddle/fluid/inference/tensorrt/convert/set_value_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/set_value_op.cc @@ -24,16 +24,6 @@ limitations under the License. */ } \ } while (0) -namespace paddle { -namespace framework { -class Scope; - -namespace proto { -class OpDesc; -} // namespace proto -} // namespace framework -} // namespace paddle - namespace paddle { namespace inference { namespace tensorrt { @@ -55,6 +45,14 @@ class SetValueConverter : public OpConverter { auto* inputs = engine_->GetITensor(op_desc.Input("Input")[0]); auto* updates = engine_->GetITensor(op_desc.Input("ValueTensor")[0]); + const auto decrease_axes = PADDLE_GET_CONST( + std::vector, op_desc.GetAttr("decrease_axes")); + std::vector decr_axes{decrease_axes.begin(), decrease_axes.end()}; + auto value_rank = updates->getDimensions().nbDims; + auto input_rank = inputs->getDimensions().nbDims; + if (decrease_axes.size() > 0 && value_rank != input_rank) { + updates = Unsqueeze(updates, decr_axes); + } int64_t axes = 0; int64_t starts = 0; @@ -115,39 +113,14 @@ class SetValueConverter : public OpConverter { indices.insert(indices.end(), axes_index.begin(), axes_index.end()); } - nvinfer1::Dims indice_dims = update_dims; - - // create a tensor to store data - std::vector indice_dim_vec; - for (int i = 0; i < update_dims.nbDims; i++) { - indice_dim_vec.emplace_back(update_dims.d[i]); - } - auto indice_tensor_dims = phi::make_ddim(indice_dim_vec); - std::unique_ptr indice_tensor( - std::make_unique()); - indice_tensor->Resize(indice_tensor_dims); - - auto* dev_ctx = static_cast( - platform::DeviceContextPool::Instance().Get(platform::CPUPlace())); - auto* weight_data = dev_ctx->template HostAlloc(indice_tensor.get()); - - memcpy(weight_data, indices.data(), sizeof(int) * indice_tensor->numel()); - - TensorRTEngine::Weight weight{ - nvinfer1::DataType::kINT32, - static_cast(weight_data), - static_cast(indice_tensor->numel())}; auto output_name = op_desc.Output("Out")[0]; - engine_->SetWeights("set_value_index_" + output_name, - std::move(indice_tensor)); - - auto const_layer = - TRT_ENGINE_ADD_LAYER(engine_, Constant, indice_dims, weight.get()); + const auto const_layer = AddConstantLayer( + indices.data(), update_dims, "set_value_index_" + output_name); auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Scatter, *inputs, - *const_layer->getOutput(0), + *const_layer, *updates, nvinfer1::ScatterMode::kELEMENT); diff --git a/paddle/fluid/inference/tensorrt/convert/top_k_op.cc b/paddle/fluid/inference/tensorrt/convert/top_k_op.cc index 938be060a50..4fcf6c43746 100644 --- a/paddle/fluid/inference/tensorrt/convert/top_k_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/top_k_op.cc @@ -33,77 +33,71 @@ class TopKOpConverter : public OpConverter { void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(3) << "convert a top_k op to tensorrt TopK layer"; - // Here the two nullptr looks strange, that's because the - // framework::OpDesc's constructor is strange. + VLOG(3) << "convert a top_k op to tensorrt layer"; framework::OpDesc op_desc(op, nullptr); auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]); - const int k = op_desc.HasAttr("k") - ? PADDLE_GET_CONST(int, op_desc.GetAttr("k")) - : 1.0f; - - nvinfer1::Dims input_dims = input_tensor->getDimensions(); - int axis = input_dims.nbDims; - nvinfer1::ITopKLayer* layer = - TRT_ENGINE_ADD_LAYER(engine_, - TopK, - *input_tensor, - nvinfer1::TopKOperation::kMAX, - k, - 1 << (axis - 1)); - - std::vector output_names; - output_names.push_back(op_desc.Output("Out").front()); - output_names.push_back(op_desc.Output("Indices").front()); - - RreplenishLayerAndOutput(layer, "top_k", output_names, test_mode); - } -}; -class TopKv2OpConverter : public OpConverter { - public: - TopKv2OpConverter() {} - void operator()(const framework::proto::OpDesc& op, - const framework::Scope& scope, - bool test_mode) override { - // Here the two nullptr looks strange, that's because the - // framework::OpDesc's constructor is strange. - framework::OpDesc op_desc(op, nullptr); - - auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]); - - const int k = op_desc.HasAttr("k") - ? PADDLE_GET_CONST(int, op_desc.GetAttr("k")) - : 1.0f; - const int axis = op_desc.HasAttr("axis") - ? PADDLE_GET_CONST(int, op_desc.GetAttr("axis")) - : 1.0f; + const int k = + op_desc.HasAttr("k") ? PADDLE_GET_CONST(int, op_desc.GetAttr("k")) : 1; + int axis = op_desc.HasAttr("axis") + ? PADDLE_GET_CONST(int, op_desc.GetAttr("axis")) + : -1; const bool largest = op_desc.HasAttr("largest") ? PADDLE_GET_CONST(bool, op_desc.GetAttr("largest")) : true; auto flag = largest ? nvinfer1::TopKOperation::kMAX : nvinfer1::TopKOperation::kMIN; + + auto input_dims = input_tensor->getDimensions(); + auto input_rank = input_dims.nbDims; + // 1d needs expand to 2d + bool expand_to_2d = (input_rank == 1); + if (engine_->with_dynamic_shape() && expand_to_2d) { + input_tensor = Unsqueeze(input_tensor, std::vector{1}); + } + + // INT32 only, other data type should to casted to INT32. + nvinfer1::DataType type = input_tensor->getType(); + bool cast = (type == nvinfer1::DataType::kINT32); + if (cast) { + input_tensor = Cast(input_tensor, nvinfer1::DataType::kFLOAT); + } + nvinfer1::ITopKLayer* layer = nullptr; - if (axis == -1) { - nvinfer1::Dims input_dims = input_tensor->getDimensions(); - layer = TRT_ENGINE_ADD_LAYER( - engine_, TopK, *input_tensor, flag, k, 1 << (input_dims.nbDims - 1)); - } else { - if (engine_->with_dynamic_shape()) { - layer = TRT_ENGINE_ADD_LAYER( - engine_, TopK, *input_tensor, flag, k, 1 << axis); - } else { - layer = TRT_ENGINE_ADD_LAYER( - engine_, TopK, *input_tensor, flag, k, 1 << (axis - 1)); - } + if (axis > 0 && !engine_->with_dynamic_shape()) { + axis -= 1; } - std::vector output_names; - output_names.push_back(op_desc.Output("Out").front()); - output_names.push_back(op_desc.Output("Indices").front()); + if (axis < 0) axis += input_rank; + + layer = + TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k, 1 << axis); + + nvinfer1::ITensor* values = layer->getOutput(0); + nvinfer1::ITensor* indices = layer->getOutput(1); + + // un-expand to 1d + if (engine_->with_dynamic_shape() && expand_to_2d) { + values = Squeeze(values, std::vector{1}); + indices = Squeeze(indices, std::vector{1}); + } + + // cast back + if (cast) { + values = Cast(values, nvinfer1::DataType::kINT32); + } + + auto out_name = op_desc.Output("Out").front(); + auto indices_name = op_desc.Output("Indices").front(); + values->setName(out_name.c_str()); + engine_->SetITensor(out_name.c_str(), values); + + indices->setName(indices_name.c_str()); + engine_->SetITensor(indices_name.c_str(), indices); - RreplenishLayerAndOutput(layer, "top_k_v2", output_names, test_mode); + layer->setName( + ("top_k (Output: " + out_name + "," + indices_name + ")").c_str()); } }; } // namespace tensorrt @@ -111,4 +105,4 @@ class TopKv2OpConverter : public OpConverter { } // namespace paddle REGISTER_TRT_OP_CONVERTER(top_k, TopKOpConverter); -REGISTER_TRT_OP_CONVERTER(top_k_v2, TopKv2OpConverter); +REGISTER_TRT_OP_CONVERTER(top_k_v2, TopKOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index f9d22755085..6dbb05bbff8 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2402,6 +2402,22 @@ struct SimpleOpTypeSetTeller : public Teller { #if !IS_TRT_VERSION_GE(8200) return false; #endif + auto inputs = desc.Inputs(); + if (inputs.find("StartsTensorList") != inputs.end()) { + if (desc.Input("StartsTensorList").size() >= 1) { + return false; + } + } + if (inputs.find("EndsTensorList") != inputs.end()) { + if (desc.Input("EndsTensorList").size() >= 1) { + return false; + } + } + if (inputs.find("StepsTensorList") != inputs.end()) { + if (desc.Input("StepsTensorList").size() >= 1) { + return false; + } + } if (!(desc.HasAttr("axes") && desc.HasAttr("starts") && desc.HasAttr("steps"))) { VLOG(3) << "the " << op_type @@ -2409,52 +2425,22 @@ struct SimpleOpTypeSetTeller : public Teller { "starts or steps)"; return false; } - auto* block = desc.Block(); - auto input_name = desc.Input("Input")[0]; - auto* input_desc = block->FindVar(input_name); - const auto input_shape = input_desc->GetShape(); - auto update_name = desc.Input("ValueTensor")[0]; - auto* update_desc = block->FindVar(update_name); - const auto update_shape = update_desc->GetShape(); - if (update_shape.size() != input_shape.size()) return false; } if (op_type == "top_k_v2" || op_type == "top_k") { - auto* block = desc.Block(); - auto x_var_name = desc.Input("X")[0]; - - if (block == nullptr) { - VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " - "Developers need to check whether block_desc is passed in " - "the pass."; - return false; - } - auto* x_var_desc = block->FindVar(x_var_name); - auto x_dtype = x_var_desc->GetDataType(); - - if (!(x_dtype == framework::proto::VarType::FP32 || - x_dtype == framework::proto::VarType::FP16)) { - return false; - } - - const auto x_shape = x_var_desc->GetShape(); - if (x_shape.size() == 1) { - VLOG(3) << "top_k/top_k_v2 does not support 1-dimensional input in " - "tensorrt"; - return false; - } if (desc.HasAttr("axis")) { int axis = PADDLE_GET_CONST(int, desc.GetAttr("axis")); - if (axis == 0) { + if (!with_dynamic_shape && axis == 0) { VLOG(3) << "top_k_v2 does not support axis == 0 in " - "tensorrt"; + "tensorrt static shape."; return false; } } if (desc.HasAttr("sorted")) { bool sorted = PADDLE_GET_CONST(bool, desc.GetAttr("sorted")); if (!sorted) { - VLOG(3) << "top_k_v2 does not support results not sorted in " + VLOG(3) << op_type + << " does not support results not sorted in " "tensorrt"; return false; } diff --git a/test/ir/inference/test_trt_convert_top_k.py b/test/ir/inference/test_trt_convert_top_k.py index e8c2f0945d8..8c8bcbde993 100644 --- a/test/ir/inference/test_trt_convert_top_k.py +++ b/test/ir/inference/test_trt_convert_top_k.py @@ -107,7 +107,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - if self.dims == 1: + if not dynamic_shape and self.dims == 1: return 0, 4 return 1, 3 diff --git a/test/ir/inference/test_trt_convert_top_k_v2.py b/test/ir/inference/test_trt_convert_top_k_v2.py index f682f5a68c9..29a04c5d049 100644 --- a/test/ir/inference/test_trt_convert_top_k_v2.py +++ b/test/ir/inference/test_trt_convert_top_k_v2.py @@ -23,7 +23,7 @@ from trt_layer_auto_scan_test import TrtLayerAutoScanTest import paddle.inference as paddle_infer -class TrtConvertActivationTest(TrtLayerAutoScanTest): +class TrtConvertTopKV2Test(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs attrs = [ @@ -31,6 +31,10 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): ] if len(inputs['input_data'].shape) <= attrs[0]['axis']: return False + axis = attrs[0]['axis'] + axis = axis if axis >= 0 else axis + len(inputs['input_data'].shape) + if inputs['input_data'].shape[axis] <= attrs[0]['k']: + return False return True def sample_program_configs(self): @@ -49,11 +53,12 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): for dims in [1, 2, 3, 4]: for batch in [1, 4]: for k in [1, 3]: - for axis in [-1, 1, 2, 3]: + for axis in [-1, 1, 0, 2, 3]: for largest in [True, False]: for sort in [True, False]: self.dims = dims self.sort = sort + self.axis = axis dics = [ { "k": k, @@ -120,7 +125,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): "input_data": [4, 32, 32, 32] } self.dynamic_shape.opt_input_shape = { - "input_data": [1, 3, 32, 32] + "input_data": [4, 3, 32, 32] } def clear_dynamic_shape(): @@ -129,7 +134,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - if self.dims == 1: + if not dynamic_shape and (self.dims == 1 or self.axis == 0): return 0, 4 if not self.sort: return 0, 4 -- GitLab