diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 95fdb09cb6807e26a9e1781d0b8d36ac142c4640..56d4da97eb5f7a4ae8fc697a0ccad51e1efd9c39 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -328,6 +328,8 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } if (op_type == "gather_nd") { + if (!with_dynamic_shape) return false; + auto* block = desc.Block(); auto x_var_name = desc.Input("X")[0]; auto index_var_name = desc.Input("Index")[0]; @@ -343,12 +345,17 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, const auto index_shape = index_var_desc->GetShape(); const auto x_shape = x_var_desc->GetShape(); + if (x_shape.size() <= 2) { + VLOG(3) << "gather_nd op requires the input's dimension to be greater " + "than 2"; + return false; + } + if (x_shape.size() != index_shape.size()) { VLOG(3) << "gather_nd op Index input dims size [" << index_shape.size() << " ] not equal to x dims size [" << x_shape.size() << "]"; return false; } - if (!with_dynamic_shape) return false; } if (op_type == "yolo_box") { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py new file mode 100644 index 0000000000000000000000000000000000000000..a109abdc298a659c37a051a4118be87a334f2a97 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_gather_nd.py @@ -0,0 +1,510 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set + + +class TrtConvertGatherNdTest_dim_4_1(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(): + return np.random.random([2, 32, 64, 64]).astype(np.float32) + + def generate_input2(): + return np.ones([1]).astype(np.int32) + + ops_config = [{ + "op_type": "gather_nd", + "op_inputs": { + "X": ["input_data"], + "Index": ["index_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": {} + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input1)), + "index_data": TensorConfig(data_gen=partial(generate_input2)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 8, 8, 8], + "index_data": [1] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 32, 64, 64], + "index_data": [1] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 4, 64, 64], + "index_data": [1] + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + def test(self): + self.run_test() + + +class TrtConvertGatherNdTest_dim_4_1_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(): + return np.random.random([2, 32, 64, 64]).astype(np.float32) + + def generate_input2(): + return np.array([1, 2]).astype(np.int32) + + ops_config = [{ + "op_type": "gather_nd", + "op_inputs": { + "X": ["input_data"], + "Index": ["index_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": {} + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input1)), + "index_data": TensorConfig(data_gen=partial(generate_input2)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 8, 8, 8], + "index_data": [1] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 32, 64, 64], + "index_data": [4] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 4, 64, 64], + "index_data": [2] + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + def test(self): + self.run_test() + + +class TrtConvertGatherNdTest_dim_4_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(): + return np.random.random([2, 32, 64, 64]).astype(np.float32) + + def generate_input2(): + return np.ones([2, 2]).astype(np.int32) + + ops_config = [{ + "op_type": "gather_nd", + "op_inputs": { + "X": ["input_data"], + "Index": ["index_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": {} + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input1)), + "index_data": TensorConfig(data_gen=partial(generate_input2)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 8, 8, 8], + "index_data": [1, 2] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 32, 64, 64], + "index_data": [4, 4] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 4, 64, 64], + "index_data": [2, 2] + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + def test(self): + self.run_test() + + +class TrtConvertGatherNdTest_dim_4_3(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(): + return np.random.random([2, 32, 64, 64]).astype(np.float32) + + def generate_input2(): + return np.ones([2, 2, 4]).astype(np.int32) + + ops_config = [{ + "op_type": "gather_nd", + "op_inputs": { + "X": ["input_data"], + "Index": ["index_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": {} + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input1)), + "index_data": TensorConfig(data_gen=partial(generate_input2)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 8, 8, 8], + "index_data": [1, 2, 2] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 32, 64, 64], + "index_data": [4, 4, 4] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 4, 64, 64], + "index_data": [2, 2, 2] + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + def test(self): + self.run_test() + + +class TrtConvertGatherNdTest_dim_2_2(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(): + return np.random.random([2, 32]).astype(np.float32) + + def generate_input2(): + return np.ones([2, 2]).astype(np.int32) + + ops_config = [{ + "op_type": "gather_nd", + "op_inputs": { + "X": ["input_data"], + "Index": ["index_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": {} + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input1)), + "index_data": TensorConfig(data_gen=partial(generate_input2)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 4], + "index_data": [1, 1] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 64], + "index_data": [4, 2] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 8], + "index_data": [2, 2] + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), 1e-5 + + def add_skip_trt_case(self): + def teller(program_config, predictor_config): + if len(self.dynamic_shape.min_input_shape) != 0: + return True + return False + + self.add_skip_case( + teller, SkipReasons.TRT_NOT_SUPPORT, + "Need to repair the case: the output of trt and GPU has diff when inputs' dim is 1 and 2." + ) + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +class TrtConvertGatherNdTest_dim_3_3(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input1(): + return np.random.random([2, 32, 256]).astype(np.float32) + + def generate_input2(): + return np.ones([2, 2, 2]).astype(np.int32) + + ops_config = [{ + "op_type": "gather_nd", + "op_inputs": { + "X": ["input_data"], + "Index": ["index_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": {} + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input1)), + "index_data": TensorConfig(data_gen=partial(generate_input2)), + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input_data": [1, 4, 4], + "index_data": [1, 1, 1] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 64, 512], + "index_data": [4, 2, 4] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [2, 8, 64], + "index_data": [2, 2, 2] + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), 1e-5 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main()