diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6523e5cfced3ea00c77cd8d9847cc62a50dbd89a..790c32b31e1294ba748a8f6ba79e12ad33723fee 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2685,6 +2685,7 @@ USE_TRT_CONVERTER(tanh_shrink) USE_TRT_CONVERTER(logsigmoid) USE_TRT_CONVERTER(lookup_table) USE_TRT_CONVERTER(expand_v2) +USE_TRT_CONVERTER(expand_as_v2) USE_TRT_CONVERTER(take_along_axis) USE_TRT_CONVERTER(skip_groupnorm_act) USE_TRT_CONVERTER(preln_groupnorm_act) diff --git a/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc index 0c36811145a6d5e17dd2cafd193110bb83534d77..452f1f8b92057d3b7ea61589a8352e9336e1b624 100644 --- a/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,12 +18,12 @@ namespace paddle { namespace inference { namespace tensorrt { -class ExpandV2OpConverter : public OpConverter { +class ExpandOpConverter : public OpConverter { public: void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(3) << "convert a expand_v2 op to trt expand layer."; + VLOG(3) << "convert a paddle " << op_type_ << " op to trt expand layer."; framework::OpDesc op_desc(op, nullptr); auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto inputs = op_desc.Inputs(); @@ -33,25 +33,40 @@ class ExpandV2OpConverter : public OpConverter { nvinfer1::ITensor* shape_tensor = nullptr; int32_t shape_rank = 0; - if (inputs.find("Shape") != inputs.end() && - op_desc.Input("Shape").size() >= 1) { - shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]); - shape_rank = shape_tensor->getDimensions().d[0]; - } else if (inputs.find("expand_shapes_tensor") != inputs.end() && - op_desc.Input("expand_shapes_tensor").size() >= 1) { - int shape_size = op_desc.Input("expand_shapes_tensor").size(); - std::vector shape_tensors; - for (int i = 0; i < shape_size; ++i) { - shape_tensors.push_back( - engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i])); + + if (op_type_ == "expand_v2") { + if (inputs.find("Shape") != inputs.end() && + op_desc.Input("Shape").size() >= 1) { + shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]); + shape_rank = shape_tensor->getDimensions().nbDims; + } else if (inputs.find("expand_shapes_tensor") != inputs.end() && + op_desc.Input("expand_shapes_tensor").size() >= 1) { + int shape_size = op_desc.Input("expand_shapes_tensor").size(); + std::vector shape_tensors; + for (int i = 0; i < shape_size; ++i) { + shape_tensors.push_back( + engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i])); + } + shape_tensor = Concat(shape_tensors); + shape_rank = shape_size; + } else { + std::vector shape = + PADDLE_GET_CONST(std::vector, op_desc.GetAttr("shape")); + shape_tensor = + Add1DConstantLayer(shape, output_name + "_shape_tensor_"); + shape_rank = shape.size(); + } + } else if (op_type_ == "expand_as_v2") { + if (inputs.find("Y") != inputs.end()) { + shape_tensor = engine_->GetITensor(op_desc.Input("Y")[0]); + shape_rank = shape_tensor->getDimensions().nbDims; + } else { + std::vector shape = PADDLE_GET_CONST( + std::vector, op_desc.GetAttr("target_shape")); + shape_tensor = + Add1DConstantLayer(shape, output_name + "_target_shape_tensor_"); + shape_rank = shape.size(); } - shape_tensor = Concat(shape_tensors); - shape_rank = shape_size; - } else { - std::vector shape = - PADDLE_GET_CONST(std::vector, op_desc.GetAttr("shape")); - shape_tensor = Add1DConstantLayer(shape, output_name + "_shape_tensor_"); - shape_rank = shape.size(); } nvinfer1::ITensor* input_shape_tensor; @@ -68,8 +83,7 @@ class ExpandV2OpConverter : public OpConverter { input_shape_tensor = Shape(input); } - auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - shuffle->setInput(1, *input_shape_tensor); + auto* newInputTensor = Reshape(input, input_shape_tensor); std::vector start_vec(shape_rank, 0); nvinfer1::Dims start; @@ -91,13 +105,26 @@ class ExpandV2OpConverter : public OpConverter { auto strides_tensor = Min(one_tensor, input_sub_tensor); auto layer = TRT_ENGINE_ADD_LAYER( - engine_, Slice, *shuffle->getOutput(0), start, size, stride); + engine_, Slice, *newInputTensor, start, size, stride); layer->setInput(1, *starts_tensor); layer->setInput(2, *sizes_tensor); layer->setInput(3, *strides_tensor); - RreplenishLayerAndOutput(layer, "expand_v2", {output_name}, test_mode); + RreplenishLayerAndOutput(layer, op_type_, {output_name}, test_mode); } + + protected: + std::string op_type_; +}; + +class ExpandV2OpConverter : public ExpandOpConverter { + public: + ExpandV2OpConverter() { op_type_ = "expand_v2"; } +}; + +class ExpandAsV2OpConverter : public ExpandOpConverter { + public: + ExpandAsV2OpConverter() { op_type_ = "expand_as_v2"; } }; } // namespace tensorrt @@ -105,3 +132,4 @@ class ExpandV2OpConverter : public OpConverter { } // namespace paddle REGISTER_TRT_OP_CONVERTER(expand_v2, ExpandV2OpConverter); +REGISTER_TRT_OP_CONVERTER(expand_as_v2, ExpandAsV2OpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 85f5c003746c2038067d23dd1d551d049eb33d77..9ce57fe6aee91235a375826caafb9657b5a5ec57 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -2654,11 +2654,35 @@ struct SimpleOpTypeSetTeller : public Teller { } } - if (op_type == "expand_v2") { + if (op_type == "expand_as_v2" || op_type == "expand_v2") { if (!with_dynamic_shape) { + VLOG(3) << "the " << op_type + << "does not support " + "static shape yet"; return false; } - if (!desc.HasAttr("shape")) { + + auto inputs = desc.Inputs(); + if (op_type == "expand_as_v2") { + if (!desc.HasAttr("target_shape") && inputs.find("Y") == inputs.end()) { + VLOG(3) + << "expand_as_v2 op need have input(Y) or attr(target_shape). "; + return false; + } + } else if (op_type == "expand_v2") { + if (!desc.HasAttr("shape") && inputs.find("Shape") == inputs.end() && + inputs.find("expand_shapes_tensor") == inputs.end()) { + VLOG(3) << "expand_v2 op need have input(Shape) or " + "input(expand_shapes_tensor) or attr(shape) . "; + return false; + } + } + + auto* block = desc.Block(); + if (block == nullptr) { + VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " + "Developers need to check whether block_desc is passed in " + "the pass."; return false; } } @@ -2921,6 +2945,7 @@ struct SimpleOpTypeSetTeller : public Teller { "skip_merge_layernorm", "lookup_table_v2", "expand_v2", + "expand_as_v2", "fuse_eleadd_transpose", "skip_groupnorm_act", "preln_groupnorm_act", @@ -3080,6 +3105,7 @@ struct SimpleOpTypeSetTeller : public Teller { "lookup_table", "lookup_table_v2", "expand_v2", + "expand_as_v2", "fuse_eleadd_transpose", "skip_groupnorm_act", "preln_groupnorm_act", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_as_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_as_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..46b3a2232e4711ce34819ea75bdb0ba25b643f86 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_as_v2.py @@ -0,0 +1,252 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial +from typing import Any, Dict, List + +import numpy as np +from program_config import ProgramConfig, TensorConfig +from trt_layer_auto_scan_test import TrtLayerAutoScanTest + +import paddle.inference as paddle_infer + + +class TrtConvertExpandASV2Test(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + if len(attrs[0]['target_shape']) < self.dims: + return False + if self.dims == 1: + if len(attrs[0]['target_shape']) == 4: + return False + return True + + def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): + if self.dims == 4: + self.input_shape = [1, 8, 1, 32] + return np.random.random([1, 8, 1, 32]).astype(np.float32) + elif self.dims == 3: + self.input_shape = [1, 32, 32] + return np.random.random([1, 32, 32]).astype(np.float32) + elif self.dims == 2: + self.input_shape = [1, 32] + return np.random.random([1, 32]).astype(np.float32) + elif self.dims == 1: + self.input_shape = [32] + return np.random.random([32]).astype(np.float32) + + for dims in [1, 2, 3, 4]: + for shape in [ + [10, 8, 32, 32], + [2, 8, 32, 32], + [8, 32, 32], + [2, 32], + [32], + ]: + dics = [ + { + "target_shape": shape, + }, + ] + self.dims = dims + + ops_config = [ + { + "op_type": "expand_as_v2", + "op_inputs": {"X": ["expand_v2_input"]}, + "op_outputs": {"Out": ["expand_v2_out"]}, + "op_attrs": dics[0], + } + ] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "expand_v2_input": TensorConfig( + data_gen=partial(generate_input1, dics) + ) + }, + outputs=["expand_v2_out"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 4: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 8, 1, 32] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [10, 8, 1, 32] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 8, 1, 32] + } + elif self.dims == 3: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 32, 32] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [8, 32, 32] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 32, 32] + } + elif self.dims == 2: + self.dynamic_shape.min_input_shape = { + "expand_v2_input": [1, 32] + } + self.dynamic_shape.max_input_shape = { + "expand_v2_input": [4, 32] + } + self.dynamic_shape.opt_input_shape = { + "expand_v2_input": [1, 32] + } + elif self.dims == 1: + self.dynamic_shape.min_input_shape = {"expand_v2_input": [32]} + self.dynamic_shape.max_input_shape = {"expand_v2_input": [64]} + self.dynamic_shape.opt_input_shape = {"expand_v2_input": [32]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if dynamic_shape: + return 1, 2 + else: + return 0, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + clear_dynamic_shape() + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-3 + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +class TrtConvertExpandV2Test2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + return True + + def sample_program_configs(self): + def generate_input1(attrs: List[Dict[str, Any]]): + if self.dims == 1: + self.input_shape = [1] + return np.random.random([1]).astype(np.float32) + + for dims in [1]: + for shape in [[10]]: + dics = [ + { + "target_shape": shape, + }, + ] + self.dims = dims + dics_intput = [ + {"X": ["expand_v2_input"], "Y": ["shapeT1_data"]}, + ] + ops_config = [ + { + "op_type": "fill_constant", + "op_inputs": {}, + "op_outputs": {"Out": ["shapeT1_data"]}, + "op_attrs": { + "dtype": 2, + "str_value": "10", + "shape": [1], + }, + }, + { + "op_type": "expand_as_v2", + "op_inputs": dics_intput[0], + "op_outputs": {"Out": ["expand_v2_out"]}, + "op_attrs": dics[0], + }, + ] + ops = self.generate_op_config(ops_config) + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "expand_v2_input": TensorConfig( + data_gen=partial(generate_input1, dics) + ) + }, + outputs=["expand_v2_out"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(): + if self.dims == 1: + self.dynamic_shape.min_input_shape = {"expand_v2_input": [1]} + self.dynamic_shape.max_input_shape = {"expand_v2_input": [1]} + self.dynamic_shape.opt_input_shape = {"expand_v2_input": [1]} + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + clear_dynamic_shape() + # for dynamic_shape + generate_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + # fill_constant will be folded by constnt folding pass! + yield self.create_inference_config(), (1, 2), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 2), 1e-3 + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +if __name__ == "__main__": + unittest.main()