diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 1c27c008d8ca7b130502e00acb8d5a4180fb9010..293de6bcd31a62ed4a218eea6da0238f3193ec10 100755 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2238,6 +2238,12 @@ USE_TRT_CONVERTER(elementwise_max_tensor); USE_TRT_CONVERTER(elementwise_min_tensor); USE_TRT_CONVERTER(elementwise_pow_tensor); USE_TRT_CONVERTER(elementwise_floordiv_tensor); +USE_TRT_CONVERTER(less_than); +USE_TRT_CONVERTER(greater_than); +USE_TRT_CONVERTER(logical_or); +USE_TRT_CONVERTER(logical_xor); +USE_TRT_CONVERTER(logical_and); +USE_TRT_CONVERTER(less_equal); USE_TRT_CONVERTER(transpose); USE_TRT_CONVERTER(transpose2); USE_TRT_CONVERTER(flatten); diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc old mode 100644 new mode 100755 index 53cb2da285afae4b8346f5fc6adb3afb9adec354..0280e418e804b9aacf3d80d0a60286cdd0d394a9 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -74,8 +74,12 @@ class ElementwiseTensorOpConverter : public OpConverter { nvinfer1::Dims dims_y = Y->getDimensions(); auto output_name = op_desc.Output("Out")[0]; + int axis = -1; // axis here is relative to explicit batch - int axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis")); + if (op_type_ != "logical_or" && op_type_ != "logical_xor" && + op_type_ != "logical_and") { + axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis")); + } int real_x_rank = dims_x.nbDims; int real_y_rank = dims_y.nbDims; if (!engine_->with_dynamic_shape()) { @@ -139,17 +143,40 @@ class ElementwiseTensorOpConverter : public OpConverter { X = tmp; } - auto op_pair = ops.find(op_type_); - PADDLE_ENFORCE_NE(op_pair, - ops.end(), - platform::errors::InvalidArgument( - "Elementwise op's type(%s) is not supported. Please " - "check if the op_type is correct.", - op_type_)); - - auto* layer = TRT_ENGINE_ADD_LAYER( - engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second); - RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode); + if (op_type_ == "less_equal") { + auto* less_layer = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *X, + *reshape_y_tensor, + nvinfer1::ElementWiseOperation::kLESS); + auto* equal_layer = + TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *X, + *reshape_y_tensor, + nvinfer1::ElementWiseOperation::kEQUAL); + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, + ElementWise, + *(less_layer->getOutput(0)), + *(equal_layer->getOutput(0)), + nvinfer1::ElementWiseOperation::kOR); + + RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode); + } else { + auto op_pair = ops.find(op_type_); + PADDLE_ENFORCE_NE( + op_pair, + ops.end(), + platform::errors::InvalidArgument( + "Elementwise op's type(%s) is not supported. Please " + "check if the op_type is correct.", + op_type_)); + + auto* layer = TRT_ENGINE_ADD_LAYER( + engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second); + RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode); + } } protected: @@ -168,6 +195,11 @@ const std::unordered_map {"pow", nvinfer1::ElementWiseOperation::kPOW}, {"max", nvinfer1::ElementWiseOperation::kMAX}, {"floordiv", nvinfer1::ElementWiseOperation::kFLOOR_DIV}, + {"less_than", nvinfer1::ElementWiseOperation::kLESS}, + {"greater_than", nvinfer1::ElementWiseOperation::kGREATER}, + {"logical_or", nvinfer1::ElementWiseOperation::kOR}, + {"logical_xor", nvinfer1::ElementWiseOperation::kXOR}, + {"logical_and", nvinfer1::ElementWiseOperation::kAND}, }; class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter { @@ -204,13 +236,41 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter { public: ElementwiseTensorPowOpConverter() { op_type_ = "pow"; } }; - class ElementwiseTensorFloorDivOpConverter : public ElementwiseTensorOpConverter { public: ElementwiseTensorFloorDivOpConverter() { op_type_ = "floordiv"; } }; - +class ElementwiseTensorLessThanOpConverter + : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorLessThanOpConverter() { op_type_ = "less_than"; } +}; +class ElementwiseTensorGreaterThanOpConverter + : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorGreaterThanOpConverter() { op_type_ = "greater_than"; } +}; +class ElementwiseTensorLogicalOrOpConverter + : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorLogicalOrOpConverter() { op_type_ = "logical_or"; } +}; +class ElementwiseTensorLogicalXorOpConverter + : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorLogicalXorOpConverter() { op_type_ = "logical_xor"; } +}; +class ElementwiseTensorLogicalAndOpConverter + : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorLogicalAndOpConverter() { op_type_ = "logical_and"; } +}; +class ElementwiseTensorLessEqualOpConverter + : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorLessEqualOpConverter() { op_type_ = "less_equal"; } +}; } // namespace tensorrt } // namespace inference } // namespace paddle @@ -248,3 +308,10 @@ REGISTER_TRT_OP_CONVERTER(elementwise_pow_tensor, ElementwiseTensorPowOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_floordiv_tensor, ElementwiseTensorFloorDivOpConverter); +REGISTER_TRT_OP_CONVERTER(less_than, ElementwiseTensorLessThanOpConverter); +REGISTER_TRT_OP_CONVERTER(greater_than, + ElementwiseTensorGreaterThanOpConverter); +REGISTER_TRT_OP_CONVERTER(logical_or, ElementwiseTensorLogicalOrOpConverter); +REGISTER_TRT_OP_CONVERTER(logical_xor, ElementwiseTensorLogicalXorOpConverter); +REGISTER_TRT_OP_CONVERTER(logical_and, ElementwiseTensorLogicalAndOpConverter); +REGISTER_TRT_OP_CONVERTER(less_equal, ElementwiseTensorLessEqualOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 17fb2f0aa6d09581e33b7d4ebd3281f5f6636e21..d88de415e82cddba9ebe9a7cf2da420f51c29de3 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1322,6 +1322,32 @@ struct SimpleOpTypeSetTeller : public Teller { } } + if (op_type == "less_than" || op_type == "greater_than" || + op_type == "logical_or" || op_type == "logical_xor" || + op_type == "logical_and" || op_type == "less_equal") { +#if IS_TRT_VERSION_GE(8400) + if (!with_dynamic_shape) { + VLOG(3) << "these ops do not support static shape yet"; + return false; + } + if (op_type == "logical_or" || op_type == "logical_xor" || + op_type == "logical_and") { + auto* block = desc.Block(); + auto* x_var_desc = block->FindVar(desc.Input("X")[0]); + auto* y_var_desc = block->FindVar(desc.Input("Y")[0]); + auto x_dtype = x_var_desc->GetDataType(); + auto y_dtype = y_var_desc->GetDataType(); + if (x_dtype != framework::proto::VarType::BOOL || + y_dtype != framework::proto::VarType::BOOL) { + VLOG(3) << "the op only support input of BOOL."; + return false; + } + } +#else + VLOG(3) << "these are not supported when TensorRT < 8.4"; + return false; +#endif + } if (op_type == "elementwise_add" || op_type == "elementwise_mul" || op_type == "elementwise_sub" || op_type == "elementwise_div" || op_type == "elementwise_pow" || op_type == "elementwise_min" || @@ -2382,6 +2408,12 @@ struct SimpleOpTypeSetTeller : public Teller { "elementwise_max", "elementwise_floordiv", "equal", + "less_than", + "greater_than", + "logical_or", + "logical_xor", + "logical_and", + "less_equal", "dropout", "fill_any_like", "prelu", @@ -2514,6 +2546,12 @@ struct SimpleOpTypeSetTeller : public Teller { "elementwise_max", "elementwise_floordiv", "equal", + "less_than", + "greater_than", + "logical_or", + "logical_xor", + "logical_and", + "less_equal", "dropout", "fill_any_like", "prelu", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py new file mode 100755 index 0000000000000000000000000000000000000000..85abf692ae56a65859e759dc1c5347879beaa059 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py @@ -0,0 +1,483 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial +from typing import List + +import numpy as np +from program_config import ProgramConfig, TensorConfig +from trt_layer_auto_scan_test import TrtLayerAutoScanTest + +import paddle.inference as paddle_infer + + +class TrtConvertLogicalTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]: + for op_type in ["logical_and", "logical_or", "logical_xor"]: + for axis in [-1]: + self.dims = len(shape) + dics = [ + {"axis": axis}, + {"in_dtype": 5, "out_dtype": 0}, + {"in_dtype": 0, "out_dtype": 5}, + ] + ops_config = [ + { + "op_type": "cast", + "op_inputs": {"X": ["input_data1"]}, + "op_outputs": {"Out": ["cast_output_data1"]}, + "op_attrs": dics[1], + "outputs_dtype": {"cast_output_data1": np.bool}, + }, + { + "op_type": "cast", + "op_inputs": {"X": ["input_data2"]}, + "op_outputs": {"Out": ["cast_output_data3"]}, + "op_attrs": dics[1], + "outputs_dtype": {"cast_output_data1": np.bool}, + }, + { + "op_type": op_type, + "op_inputs": { + "X": ["cast_output_data1"], + "Y": ["cast_output_data3"], + }, + "op_outputs": {"Out": ["cast_output_data0"]}, + "op_attrs": dics[0], + }, + { + "op_type": "cast", + "op_inputs": {"X": ["cast_output_data0"]}, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[2], + }, + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input, shape) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input, shape) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 2: + self.dynamic_shape.min_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + if self.dims == 3: + self.dynamic_shape.min_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + if self.dims == 4: + self.dynamic_shape.min_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if dynamic_shape: + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: + return 0, 7 + return 1, 3 + return 0, 7 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), (1e-3, 1e-3) + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), (1e-3, 1e-3) + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +class TrtConvertCompareTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]: + for op_type in ["less_than", "greater_than"]: + for axis in [-1]: + self.dims = len(shape) + dics = [ + {"axis": axis}, + {"in_dtype": 0, "out_dtype": 5}, + ] + ops_config = [ + { + "op_type": op_type, + "op_inputs": { + "X": ["input_data1"], + "Y": ["input_data2"], + }, + "op_outputs": {"Out": ["cast_output_data0"]}, + "op_attrs": dics[0], + }, + { + "op_type": "cast", + "op_inputs": {"X": ["cast_output_data0"]}, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[1], + }, + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input, shape) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input, shape) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 2: + self.dynamic_shape.min_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + if self.dims == 3: + self.dynamic_shape.min_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + if self.dims == 4: + self.dynamic_shape.min_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: + return 0, 5 + if not dynamic_shape: + return 0, 5 + return 1, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), (1e-3, 1e-3) + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), (1e-3, 1e-3) + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +class TrtConvertLessEqualTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]: + for op_type in ["less_equal"]: + for axis in [-1]: + self.dims = len(shape) + dics = [ + {"axis": axis}, + {"in_dtype": 5, "out_dtype": 2}, + {"in_dtype": 0, "out_dtype": 5}, + ] + ops_config = [ + { + "op_type": "cast", + "op_inputs": {"X": ["input_data1"]}, + "op_outputs": {"Out": ["cast_output_data1"]}, + "op_attrs": dics[1], + }, + { + "op_type": "cast", + "op_inputs": {"X": ["input_data2"]}, + "op_outputs": {"Out": ["cast_output_data2"]}, + "op_attrs": dics[1], + }, + { + "op_type": op_type, + "op_inputs": { + "X": ["cast_output_data1"], + "Y": ["cast_output_data2"], + }, + "op_outputs": {"Out": ["cast_output_data0"]}, + "op_attrs": dics[0], + }, + { + "op_type": "cast", + "op_inputs": {"X": ["cast_output_data0"]}, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[2], + }, + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input, shape) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input, shape) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 2: + self.dynamic_shape.min_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 16], + "input_data2": [2, 16], + } + if self.dims == 3: + self.dynamic_shape.min_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 16, 32], + "input_data2": [2, 16, 32], + } + if self.dims == 4: + self.dynamic_shape.min_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [1, 32, 16, 32], + "input_data2": [1, 32, 16, 32], + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + ver = paddle_infer.get_trt_compile_version() + if ( + ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400 + or not dynamic_shape + ): + return 2, 5 + else: + return 1, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), (1e-3, 1e-3) + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), (1e-3, 1e-3) + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +if __name__ == "__main__": + unittest.main()