diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 789a478e2bd091edbda45534386268afe85aa9f9..226d8acdc11db2d1aff11396f02bceaa4437fe9b 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2263,6 +2263,7 @@ USE_TRT_CONVERTER(layernorm_shift_partition) USE_TRT_CONVERTER(generic_plugin_creater) USE_TRT_CONVERTER(custom_plugin_creater) USE_TRT_CONVERTER(lookup_table) +USE_TRT_CONVERTER(expand_v2) #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000) USE_TRT_CONVERTER(sparse_fc) USE_TRT_CONVERTER(sparse_multihead_matmul) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 5e9e6d8f2c4f141bdfaa11002370806fa23ce834..75c12bf7ca71e1dde32ce625f8186e3e2cb43c47 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -78,7 +78,8 @@ list( fused_token_prune_op.cc layernorm_shift_partition_op.cc generic_and_custom_plugin_creater.cc - fused_lookup_tables_op.cc) + fused_lookup_tables_op.cc + expand_v2_op.cc) if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7 AND NOT WIN32) list(APPEND CONVERT_FILES emb_eltwise_layernorm.cc diff --git a/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..e491e2861bf43ae06a77a29b4588af4a3194b5d3 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc @@ -0,0 +1,97 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +class ExpandV2OpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + framework::OpDesc op_desc(op, nullptr); + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + auto input_dims = input->getDimensions(); + auto output_name = op_desc.Output("Out")[0]; + auto rank = input_dims.nbDims; + std::vector shape = + PADDLE_GET_CONST(std::vector, op_desc.GetAttr("shape")); + int32_t nbDims_num = shape.size(); + + auto* shape_tensor = + Add1DConstantLayer(shape, output_name + "_shape_tensor_"); + nvinfer1::ITensor* input_shape_tensor; + if (rank < nbDims_num) { + auto* one_rank_tensor = + Add1DConstantLayer(std::vector(nbDims_num - rank, 1), + output_name + "_one_rank_tensor_"); + auto in_shape_tensor = Shape(input); + std::vector itensors; + itensors.push_back(one_rank_tensor); + itensors.push_back(in_shape_tensor); + input_shape_tensor = Concat(itensors); + } else { + input_shape_tensor = Shape(input); + } + + auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + shuffle->setInput(1, *input_shape_tensor); + + std::vector start_vec(nbDims_num, 0); + nvinfer1::Dims start; + start.nbDims = nbDims_num; + for (int32_t i = 0; i < nbDims_num; ++i) { + start.d[i] = start_vec[i]; + } + nvinfer1::Dims size; + size.nbDims = nbDims_num; + nvinfer1::Dims stride; + stride.nbDims = nbDims_num; + + auto starts_tensor = + Add1DConstantLayer(start_vec, output_name + "_start_tensor_"); + auto one_tensor = Add1DConstantLayer(1, output_name + "_one_tensor_"); + + auto sizes_tensor = Max(input_shape_tensor, shape_tensor); + auto input_sub_tensor = Sub(input_shape_tensor, one_tensor); + auto strides_tensor = Min(one_tensor, input_sub_tensor); + + auto layer = TRT_ENGINE_ADD_LAYER( + engine_, Slice, *shuffle->getOutput(0), start, size, stride); + layer->setInput(1, *starts_tensor); + layer->setInput(2, *sizes_tensor); + layer->setInput(3, *strides_tensor); + + RreplenishLayerAndOutput(layer, "expand_v2", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(expand_v2, ExpandV2OpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index a8fdcb5e5f9c6f0228ef3f515bdb41114cf8fdf0..60cd3887c120fc08d7fa5e19c416648c68061dd3 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2109,6 +2109,27 @@ struct SimpleOpTypeSetTeller : public Teller { } } + if (op_type == "expand_v2") { + if (!with_dynamic_shape) { + return false; + } + if (!desc.HasAttr("shape")) { + return false; + } + auto expand_v2_inputs = desc.Inputs(); + if (expand_v2_inputs.find("Shape") != expand_v2_inputs.end()) { + if (desc.Input("Shape").size() >= 1) { + return false; + } + } + if (expand_v2_inputs.find("expand_shapes_tensor") != + expand_v2_inputs.end()) { + if (desc.Input("expand_shapes_tensor").size() >= 1) { + return false; + } + } + } + if (use_no_calib_int8) { return int8_teller_set.count(op_type); } else { @@ -2232,7 +2253,8 @@ struct SimpleOpTypeSetTeller : public Teller { "unsqueeze2", "layernorm_shift_partition", "lookup_table", - "lookup_table_v2"}; + "lookup_table_v2", + "expand_v2"}; std::unordered_set teller_set{ "mul", "matmul", @@ -2348,7 +2370,8 @@ struct SimpleOpTypeSetTeller : public Teller { "fused_token_prune", "layernorm_shift_partition", "lookup_table", - "lookup_table_v2"}; + "lookup_table_v2", + "expand_v2"}; }; struct GenericPluginTeller : public Teller { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..8b5e6ef34793249246143318c703ddef5d6be9bd --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_v2.py @@ -0,0 +1,416 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import List, Dict, Any +import unittest + + +class TrtConvertExpandV2Test(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + if len(attrs[0]['shape']) < self.dims: + return False + if self.dims == 1: + if len(attrs[0]['shape']) == 4: + return False + return True + + def sample_program_configs(self): + + def generate_input1(attrs: List[Dict[str, Any]]): + if self.dims == 4: + self.input_shape = [1, 1, 4, 6] + return np.random.random([1, 1, 4, 6]).astype(np.float32) + elif self.dims == 3: + self.input_shape = [1, 8, 6] + return np.random.random([1, 8, 6]).astype(np.float32) + elif self.dims == 2: + self.input_shape = [1, 48] + return np.random.random([1, 48]).astype(np.float32) + elif self.dims == 1: + self.input_shape = [48] + return np.random.random([48]).astype(np.float32) + + def generate_weight1(attrs: List[Dict[str, Any]]): + return np.array([1, 48]).astype(np.int32) + + def generate_shapeT1_data(attrs: List[Dict[str, Any]]): + return np.array([2]).astype(np.int32) + + def generate_shapeT2_data(attrs: List[Dict[str, Any]]): + return np.array([24]).astype(np.int32) + + for dims in [4, 3, 2, 1]: + for shape in [[10, 12, -1, -1], [8, 64, -1, -1], [6, 8, -1]]: + dics = [ + { + "shape": shape, + }, + ] + self.dims = dims + dics_intput = [{"X": ["expand_v2_input"]}] + + ops_config = [{ + "op_type": "expand_v2", + "op_inputs": dics_intput[0], + "op_outputs": { + "Out": ["expand_v2_out"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "expand_v2_input": + TensorConfig(data_gen=partial(generate_input1, dics)) + }, + outputs=["expand_v2_out"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + if self.dims == 4: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 1, 4, 6] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [10, 1, 4, 6] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 1, 4, 6] + } + elif self.dims == 3: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 8, 6] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [4, 8, 6] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 8, 6] + } + elif self.dims == 2: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 48] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [4, 48] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 48] + } + elif self.dims == 1: + self.dynamic_shape.min_input_shape = {"expand_v2_input": [48]} + self.dynamic_shape.max_input_shape = {"expand_v2_input": [48]} + self.dynamic_shape.opt_input_shape = {"expand_v2_input": [48]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if dynamic_shape: + return 1, 2 + else: + return 0, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +class TrtConvertExpandV2Test2(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + return True + + def sample_program_configs(self): + + def generate_input1(attrs: List[Dict[str, Any]]): + if self.dims == 1: + self.input_shape = [1] + return np.random.random([1]).astype(np.float32) + + for dims in [1]: + for shape in [[10, 12, -1, -1], [8, 64, -1, -1]]: + dics = [ + { + "shape": shape, + }, + ] + self.dims = dims + dics_intput = [ + { + "X": ["expand_v2_input"], + "Shape": ["shapeT1_data"] + }, + ] + ops_config = [ + { + "op_type": "fill_constant", + "op_inputs": {}, + "op_outputs": { + "Out": ["shapeT1_data"] + }, + "op_attrs": { + "dtype": 2, + "str_value": "10", + "shape": [1], + }, + }, + { + "op_type": "expand_v2", + "op_inputs": dics_intput[0], + "op_outputs": { + "Out": ["expand_v2_out"] + }, + "op_attrs": dics[0] + }, + ] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "expand_v2_input": + TensorConfig(data_gen=partial(generate_input1, dics)) + }, + outputs=["expand_v2_out"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(): + if self.dims == 1: + self.dynamic_shape.min_input_shape = {"expand_v2_input": [1]} + self.dynamic_shape.max_input_shape = {"expand_v2_input": [1]} + self.dynamic_shape.opt_input_shape = {"expand_v2_input": [1]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + clear_dynamic_shape() + # for dynamic_shape + generate_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), 1e-5 + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +class TrtConvertExpandV2Test3(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + return True + + def sample_program_configs(self): + + def generate_input1(attrs: List[Dict[str, Any]]): + if self.dims == 4: + self.input_shape = [1, 1, 4, 6] + return np.random.random([1, 1, 4, 6]).astype(np.float32) + elif self.dims == 3: + self.input_shape = [1, 4, 6] + return np.random.random([1, 4, 6]).astype(np.float32) + + for dims in [4, 3]: + for shape in [[10, 12, -1, -1], [8, 64, -1, -1]]: + dics = [ + { + "shape": shape, + }, + ] + self.dims = dims + dics_intput = [ + { + "X": ["expand_v2_input"], + "expand_shapes_tensor": [ + "shapeT1_data", "shapeT2_data", "shapeT3_data", + "shapeT4_data" + ] + }, + ] + ops_config = [ + { + "op_type": "fill_constant", + "op_inputs": {}, + "op_outputs": { + "Out": ["shapeT1_data"] + }, + "op_attrs": { + "dtype": 2, + "str_value": "10", + "shape": [1], + }, + }, + { + "op_type": "fill_constant", + "op_inputs": {}, + "op_outputs": { + "Out": ["shapeT2_data"] + }, + "op_attrs": { + "dtype": 2, + "str_value": "12", + "shape": [1], + }, + }, + { + "op_type": "fill_constant", + "op_inputs": {}, + "op_outputs": { + "Out": ["shapeT3_data"] + }, + "op_attrs": { + "dtype": 2, + "str_value": "4", + "shape": [1], + }, + }, + { + "op_type": "fill_constant", + "op_inputs": {}, + "op_outputs": { + "Out": ["shapeT4_data"] + }, + "op_attrs": { + "dtype": 2, + "str_value": "6", + "shape": [1], + }, + }, + { + "op_type": "expand_v2", + "op_inputs": dics_intput[0], + "op_outputs": { + "Out": ["expand_v2_out"] + }, + "op_attrs": dics[0] + }, + ] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "expand_v2_input": + TensorConfig(data_gen=partial(generate_input1, dics)) + }, + outputs=["expand_v2_out"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(): + if self.dims == 4: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 1, 4, 6] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [10, 1, 4, 6] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 1, 4, 6] + } + elif self.dims == 3: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 4, 6] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [4, 4, 6] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 4, 6] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + clear_dynamic_shape() + # for dynamic_shape + generate_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (4, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (4, 3), 1e-5 + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +if __name__ == "__main__": + unittest.main()