[inference][trt] add elementwise input data type check (#49675)

5822e15c · Zhang Jun · GitHub · 86a23818 · 5822e15c · 5822e15c
4 changed file
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1365,16 +1365,26 @@ struct SimpleOpTypeSetTeller : public Teller {
        VLOG(3) << "Ops(" << op_type << ") do not support static shape yet.";
        return false;
      }
+      auto* block = desc.Block();
+      auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
+      auto* y_var_desc = block->FindVar(desc.Input("Y")[0]);
+      auto x_dtype = x_var_desc->GetDataType();
+      auto y_dtype = y_var_desc->GetDataType();
      if (op_type == "logical_or" || op_type == "logical_xor" ||
          op_type == "logical_and") {
-        auto* block = desc.Block();
-        auto* x_var_desc = block->FindVar(desc.Input("X")[0]);
-        auto* y_var_desc = block->FindVar(desc.Input("Y")[0]);
-        auto x_dtype = x_var_desc->GetDataType();
-        auto y_dtype = y_var_desc->GetDataType();
        if (x_dtype != framework::proto::VarType::BOOL ||
            y_dtype != framework::proto::VarType::BOOL) {
-          VLOG(3) << "the op only support input of BOOL.";
+          VLOG(3) << "the op (" << op_type << ") only support input of BOOL.";
+          return false;
+        }
+      }
+      if (op_type == "less_than" || op_type == "greater_than" ||
+          op_type == "less_equal") {
+        if (x_dtype == framework::proto::VarType::BOOL ||
+            y_dtype == framework::proto::VarType::BOOL) {
+          VLOG(3)
+              << "ElementWiseOperation::kLESS/ElementWiseOperation::kGREATER "
+                 "do not support boolean datatype.";
          return false;
        }
      }
@@ -1417,6 +1427,29 @@ struct SimpleOpTypeSetTeller : public Teller {
      const auto x_shape = x_var_desc->GetShape();
      const auto y_shape = y_var_desc->GetShape();

+      // These operations do not support boolean datatype.
+      if (op_type == "elementwise_add" || op_type == "elementwise_mul" ||
+          op_type == "elementwise_sub" || op_type == "elementwise_div" ||
+          op_type == "elementwise_pow" || op_type == "elementwise_min" ||
+          op_type == "elementwise_max" || op_type == "elementwise_floordiv") {
+        if (x_var_desc->GetDataType() ==
+            paddle::framework::proto::VarType_Type::VarType_Type_BOOL) {
+          VLOG(3) << "These operations "
+                     "(elementwise_add/mul/sub/div/pow/min/max/floordiv) do "
+                     "not support boolean datatype.";
+          return false;
+        }
+      }
+      // These operations input do not support int32 datatype.
+      if (op_type == "elementwise_pow") {
+        if (x_var_desc->GetDataType() ==
+            paddle::framework::proto::VarType_Type::VarType_Type_INT32) {
+          VLOG(3) << "These operations (elementwise_pow) do not support int32 "
+                     "datatype.";
+          return false;
+        }
+      }
+
      // The case when x_shape.size() == 1 is dealt with in common case
      if (!with_dynamic_shape && (!y_var_desc->Persistable()) &&
          y_shape.size() == 1) {

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py
@@ -481,5 +481,142 @@ class TrtConvertLessEqualTest(TrtLayerAutoScanTest):
        self.run_test()


+class TrtConvertCompareSkipTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.int32)
+
+        for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]:
+            for op_type in ["less_than", "greater_than"]:
+                for axis in [-1]:
+                    self.dims = len(shape)
+                    dics = [
+                        {"axis": axis},
+                        {"in_dtype": 2, "out_dtype": 0},
+                        {"in_dtype": 0, "out_dtype": 2},
+                    ]
+                    ops_config = [
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data1"]},
+                            "op_outputs": {"Out": ["cast_output_data1"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data1": np.bool_},
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["input_data2"]},
+                            "op_outputs": {"Out": ["cast_output_data2"]},
+                            "op_attrs": dics[1],
+                            "outputs_dtype": {"cast_output_data2": np.bool_},
+                        },
+                        {
+                            "op_type": op_type,
+                            "op_inputs": {
+                                "X": ["cast_output_data1"],
+                                "Y": ["cast_output_data2"],
+                            },
+                            "op_outputs": {"Out": ["cast_output_data0"]},
+                            "op_attrs": dics[0],
+                            "outputs_dtype": {"cast_output_data0": np.bool_},
+                        },
+                        {
+                            "op_type": "cast",
+                            "op_inputs": {"X": ["cast_output_data0"]},
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[2],
+                            "outputs_dtype": {"output_data": np.int32},
+                        },
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data1": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                            "input_data2": TensorConfig(
+                                data_gen=partial(generate_input, shape)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 2:
+                shape_data = [2, 16]
+            if self.dims == 3:
+                shape_data = [2, 16, 32]
+            if self.dims == 4:
+                shape_data = [1, 32, 16, 32]
+
+            shape_info = {
+                "input_data1": shape_data,
+                "input_data2": shape_data,
+                "cast_output_data0": shape_data,
+                "cast_output_data1": shape_data,
+                "cast_output_data2": shape_data,
+            }
+            self.dynamic_shape.min_input_shape = shape_info
+            self.dynamic_shape.max_input_shape = shape_info
+            self.dynamic_shape.opt_input_shape = shape_info
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400:
+                return 0, 7
+            if not dynamic_shape:
+                return 0, 7
+            return 3, 4
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-3, 1e-3)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py
@@ -25,7 +25,7 @@ import paddle.inference as paddle_infer

 # This is the special test case with weight including batch dimension
 # I don't want to mess up the code written by others, so I wrote a class specifically
-class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest):
+class TrtConvertElementwiseTestOneInputSpecialCase0(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

@@ -158,7 +158,7 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest):


 # This is the special test case
-class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest):
+class TrtConvertElementwiseTestOneInputSpecialCase1(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

@@ -279,7 +279,7 @@ class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest):
        self.run_test()


-class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest):
+class TrtConvertElementwiseTestOneInput(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

@@ -431,9 +431,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest):
        self.run_test()


-class TrtConvertElementwiseTest_two_input_without_broadcast(
-    TrtLayerAutoScanTest
-):
+class TrtConvertElementwiseTestTwoInputWithoutBroadcast(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

@@ -592,7 +590,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast(
        self.run_test()


-class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest):
+class TrtConvertElementwiseTestTwoInputWithBroadcast(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        inputs = program_config.inputs
        if len(inputs['input_data1'].shape) != len(inputs['input_data2'].shape):
@@ -754,7 +752,7 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest):
        self.run_test()


-class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
+class TrtConvertElementwiseTestOneInputCornerCase(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        return True

@@ -896,5 +894,157 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
        self.run_test()


+class TrtConvertElementwiseTestTwoInputSkipCase(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        # if program_config.ops[0].type in "round":
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape, op_type):
+            if op_type == "elementwise_pow":
+                return np.random.randint(
+                    low=1, high=10000, size=shape, dtype=np.int32
+                )
+            # Paddle mul support bool and TensorRT not
+            if op_type == "elementwise_mul":
+                return np.random.random(shape).astype(np.bool)
+
+        for shape in [[4], [4, 32], [2, 32, 16], [1, 8, 16, 32]]:
+            for op_type in [
+                "elementwise_pow",
+                "elementwise_mul",
+            ]:
+                for axis in [0, -1]:
+                    self.dims = len(shape)
+                    dics = [{"axis": axis}]
+                    ops_config = [
+                        {
+                            "op_type": op_type,
+                            "op_inputs": {
+                                "X": ["input_data1"],
+                                "Y": ["input_data2"],
+                            },
+                            "op_outputs": {"Out": ["output_data"]},
+                            "op_attrs": dics[0],
+                            "outputs_dtype": {
+                                "output_data": np.int32
+                                if op_type == "elementwise_pow"
+                                else np.bool_
+                            },
+                        }
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data1": TensorConfig(
+                                data_gen=partial(generate_input, shape, op_type)
+                            ),
+                            "input_data2": TensorConfig(
+                                data_gen=partial(generate_input, shape, op_type)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 1:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1],
+                    "input_data2": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [128],
+                    "input_data2": [128],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [32],
+                    "input_data2": [32],
+                }
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1, 4],
+                    "input_data2": [1, 4],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [128, 256],
+                    "input_data2": [128, 256],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [32, 64],
+                    "input_data2": [32, 64],
+                }
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1, 4, 4],
+                    "input_data2": [1, 4, 4],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [128, 128, 256],
+                    "input_data2": [128, 128, 256],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 32, 16],
+                    "input_data2": [2, 32, 16],
+                }
+            elif self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data1": [1, 4, 4, 4],
+                    "input_data2": [1, 4, 4, 4],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data1": [8, 128, 64, 128],
+                    "input_data2": [8, 128, 64, 128],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data1": [2, 64, 32, 32],
+                    "input_data2": [2, 64, 32, 32],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            return 0, 4
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-5, 1e-5)
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), (1e-3, 1e-3)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (0, 4), (1e-5, 1e-5)
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (0, 4), (1e-3, 1e-3)
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
 if __name__ == "__main__":
    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py
@@ -23,7 +23,7 @@ from trt_layer_auto_scan_test import TrtLayerAutoScanTest
 import paddle.inference as paddle_infer


-class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest):
+class TrtConvertEqualOneInputCornerCase(TrtLayerAutoScanTest):
    def is_program_valid(self, program_config: ProgramConfig) -> bool:
        attrs = [
            program_config.ops[i].attrs for i in range(len(program_config.ops))