diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index f5ad3df909c46abfe33c0441c4d7d0a2ed5326a1..0093decea5a156dc8eae8ab00c4a6360adeed129 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1761,6 +1761,7 @@ USE_TRT_CONVERTER(deformable_conv); USE_TRT_CONVERTER(pool3d) USE_TRT_CONVERTER(fused_preln_embedding_eltwise_layernorm) USE_TRT_CONVERTER(preln_skip_layernorm) +USE_TRT_CONVERTER(roll) USE_TRT_CONVERTER(strided_slice) #endif diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index f1800afcb1d26e4c7624cf013198f4c30b227754..ec8c1b2fcd75c22a8e895f6751ad4ddec74c0124 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -25,6 +25,7 @@ nv_library(tensorrt_converter preln_emb_eltwise_layernorm.cc strided_slice_op.cc preln_skip_layernorm.cc + roll_op.cc DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS diff --git a/paddle/fluid/inference/tensorrt/convert/roll_op.cc b/paddle/fluid/inference/tensorrt/convert/roll_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..407f43d58678eba1265964332afdcc721128c58a --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/roll_op.cc @@ -0,0 +1,89 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/helper.h" + +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { +/* + * Stack converter from fluid to tensorRT. + */ +class RollOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(4) << "convert fluid Roll op to tensorrt Slice layer"; + + framework::OpDesc op_desc(op, nullptr); + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + nvinfer1::Dims input_dims = input->getDimensions(); + + std::vector axis = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("axis")); + std::vector shifts = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("shifts")); + + nvinfer1::Dims start; + start.nbDims = input_dims.nbDims; + for (int i = 0; i < start.nbDims; i++) { + start.d[i] = 0; + } + int axis_size = axis.size(); + for (int i = 0; i < axis_size; i++) { + start.d[axis[i]] = (-shifts[i]) % input_dims.d[axis[i]]; + } + + nvinfer1::Dims stride; + stride.nbDims = input_dims.nbDims; + for (int i = 0; i < stride.nbDims; i++) { + stride.d[i] = 1; + } + + nvinfer1::Dims size; + size.nbDims = input_dims.nbDims; + for (int i = 0; i < size.nbDims; i++) { + size.d[i] = 1; + } + + auto output_name = op_desc.Output("Out")[0]; + + auto shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input); + + auto* layer = + TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, start, size, stride); + layer->setInput(2, *shape_layer->getOutput(0)); +#if IS_TRT_VERSION_GE(7000) + layer->setMode(nvinfer1::SliceMode::kWRAP); +#endif + + RreplenishLayerAndOutput(layer, "roll", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(roll, RollOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index d9a874dd2b6295fd24beee86bd3c81815b4ccff6..b44450e7a8212d0a62571039ae6424f25a888e8b 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -119,6 +119,7 @@ struct SimpleOpTypeSetTeller : public Teller { "slice", "strided_slice", "fused_preln_embedding_eltwise_layernorm", + "roll", "preln_skip_layernorm"}; std::unordered_set teller_set{ "mul", @@ -182,6 +183,7 @@ struct SimpleOpTypeSetTeller : public Teller { "strided_slice", "fused_preln_embedding_eltwise_layernorm", "preln_skip_layernorm", + "roll", "multiclass_nms3"}; }; @@ -928,6 +930,28 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } } + if (op_type == "roll") { +#if !IS_TRT_VERSION_GE(7000) + VLOG(3) << "roll converter does not support trt versions below 7.0"; + return false; +#endif + if (!with_dynamic_shape) { + return false; + } + } + + if (op_type == "strided_slice") { + if (!with_dynamic_shape) { + return false; + } + if (!desc.HasAttr("axes") || !desc.HasAttr("starts") || + !desc.HasAttr("ends") || !desc.HasAttr("strides")) { + VLOG(3) + << "The necessary attributes of the strided_slice operator miss "; + return false; + } + } + if (op_type == "slice") { if (desc.HasAttr("decrease_axis")) { std::vector decrease_axis = diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py new file mode 100644 index 0000000000000000000000000000000000000000..1b3d38036614ffceddd7d234aaf1991d3c8ef8b2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_roll.py @@ -0,0 +1,124 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest + + +class TrtConvertRollTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + inputs = program_config.inputs + weights = program_config.weights + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + return True + + def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): + return np.ones([1, 56, 56, 192]).astype(np.float32) + + for axis in [[1, 2]]: + for shifts in [[-1, -1], [-3, -3]]: + dics = [{ + "axis": axis, + "shifts": shifts, + }] + + ops_config = [{ + "op_type": "roll", + "op_inputs": { + "X": ["input_data"] + }, + "op_outputs": { + "Out": ["roll_output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(data_gen=partial(generate_input1, dics)) + }, + outputs=["roll_output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 56, 56, 192] + } + self.dynamic_shape.max_input_shape = { + "input_data": [8, 56, 56, 192] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [4, 56, 56, 192] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + inputs = program_config.inputs + + if not dynamic_shape: + return 0, 3 + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7000: + return 0, 3 + return 1, 2 + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-4 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num(attrs, + True), 1e-4 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main()