[Paddle Inference] Add where trt converter (#47820)

dac0f7dd · xiaoxiaohehe001 · GitHub · 2d383b81 · dac0f7dd · dac0f7dd
8 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2259,6 +2259,7 @@ USE_TRT_CONVERTER(prelu);
 USE_TRT_CONVERTER(conv2d_transpose);
 USE_TRT_CONVERTER(leaky_relu);
 USE_TRT_CONVERTER(shuffle_channel);
+USE_TRT_CONVERTER(where);
 USE_TRT_CONVERTER(swish);
 USE_TRT_CONVERTER(silu);
 USE_TRT_CONVERTER(group_norm);

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -25,6 +25,7 @@ list(
  multihead_matmul_op.cc
  multihead_matmul_roformer_op.cc
  shuffle_channel_op.cc
+  where_op.cc
  swish_op.cc
  silu_op.cc
  instance_norm_op.cc

--- a/paddle/fluid/inference/tensorrt/convert/where_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/where_op.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+namespace paddle {
+namespace framework {
+class Scope;
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+/*
+ * Where Op
+ */
+class WhereOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(3) << "convert a fluid where op to tensorrt where layer";
+    framework::OpDesc op_desc(op, nullptr);
+    std::string input_x_name = op_desc.Input("X").front();
+    std::string condition_name = op_desc.Input("Condition").front();
+    std::string input_y_name = op_desc.Input("Y").front();
+    std::string output_name = op_desc.Output("Out").front();
+    const auto input_x_tensor = engine_->GetITensor(input_x_name);
+    const auto condition_tensor = engine_->GetITensor(condition_name);
+    const auto input_y_tensor = engine_->GetITensor(input_y_name);
+    auto layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Select, *condition_tensor, *input_x_tensor, *input_y_tensor);
+    RreplenishLayerAndOutput(layer, "where", {output_name}, test_mode);
+  }
+};
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+REGISTER_TRT_OP_CONVERTER(where, WhereOpConverter);
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -63,6 +63,10 @@ TRT_DT FluidDataType2TRT(FluidDT type) {
      return TRT_DT::kINT32;
    case FluidDT::VarType_Type_FP16:
      return TRT_DT::kHALF;
+#if IS_TRT_VERSION_GE(8400)
+    case FluidDT::VarType_Type_BOOL:
+      return TRT_DT::kBOOL;
+#endif
    default:
      return TRT_DT::kINT32;
  }

--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1654,6 +1654,17 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
    }
+    if (op_type == "where") {
+#if !IS_TRT_VERSION_GE(8400)
+      VLOG(3) << "where is not supported when TensorRT < 8.4";
+      return false;
+#endif
+      if (!with_dynamic_shape) {
+        VLOG(3) << "the where op does not support static shape yet";
+        return false;
+      }
+    }
    if (op_type == "skip_layernorm") {
      if (!with_dynamic_shape) {
        VLOG(3) << "the skip_layernorm does not support static shape yet";
@@ -2285,6 +2296,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "leaky_relu",
      "fc",
      "shuffle_channel",
+      "where",
      "swish",
      "silu",
      "celu",
@@ -2409,6 +2421,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "leaky_relu",
      "fc",
      "shuffle_channel",
+      "where",
      "swish",
      "silu",
      "celu",

--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -601,10 +601,14 @@ class TensorRTEngineOp : public framework::OperatorBase {
        buffers[bind_index] = static_cast<void *>(t.data<int32_t>());
      } else if (type == framework::proto::VarType::FP16) {
        buffers[bind_index] = static_cast<void *>(t.data<float16>());
+#if IS_TRT_VERSION_GE(8400)
+      } else if (type == framework::proto::VarType::BOOL) {
+        buffers[bind_index] = static_cast<void *>(t.data<bool>());
+#endif
      } else {
-        PADDLE_THROW(
+        PADDLE_THROW(platform::errors::Fatal(
-            platform::errors::Fatal("The TRT Engine OP only support "
+            "The TRT Engine OP only support "
-                                    "float/int32_t/int64_t/float16 input."));
+            "float/int32_t/int64_t/float16/bool input."));
      }
    }

--- a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
@@ -181,14 +181,25 @@ class AutoScanTest(unittest.TestCase):
        ops = []
        for i in range(len(ops_config)):
            op_config = ops_config[i]
-            ops.append(
+            if 'outputs_dtype' in op_config:
-                OpConfig(
+                ops.append(
-                    type=op_config['op_type'],
+                    OpConfig(
-                    inputs=op_config['op_inputs'],
+                        type=op_config['op_type'],
-                    outputs=op_config['op_outputs'],
+                        inputs=op_config['op_inputs'],
-                    attrs=op_config['op_attrs'],
+                        outputs=op_config['op_outputs'],
+                        attrs=op_config['op_attrs'],
+                        outputs_dtype=op_config['outputs_dtype'],
+                    )
+                )
+            else:
+                ops.append(
+                    OpConfig(
+                        type=op_config['op_type'],
+                        inputs=op_config['op_inputs'],
+                        outputs=op_config['op_outputs'],
+                        attrs=op_config['op_attrs'],
+                    )
                )
-            )
        return ops
    @abc.abstractmethod

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_where.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_where.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+from program_config import TensorConfig, ProgramConfig
+import unittest
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import List
+class TrtConvertActivationTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        ver = paddle_infer.get_trt_compile_version()
+        if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8415:
+            return False
+        return True
+    def sample_program_configs(self):
+        self.trt_param.workspace_size = 1073741824
+        def generate_input1(dims, batch):
+            if dims == 1:
+                return np.zeros((batch)).astype(np.float32)
+            elif dims == 2:
+                return np.ones((batch, 4)).astype(np.float32)
+            elif dims == 3:
+                return np.ones((batch, 4, 6)).astype(np.float32)
+            else:
+                return np.ones((batch, 4, 6, 8)).astype(np.float32)
+        def generate_input2(dims, batch):
+            if dims == 1:
+                return np.zeros((batch)).astype(np.float32)
+            elif dims == 2:
+                return np.ones((batch, 4)).astype(np.float32)
+            elif dims == 3:
+                return np.ones((batch, 4, 6)).astype(np.float32)
+            else:
+                return np.ones((batch, 4, 6, 8)).astype(np.float32)
+        def generate_input3(dims, batch):
+            if dims == 1:
+                return np.zeros((batch)).astype(np.float32)
+            elif dims == 2:
+                return np.ones((batch, 4)).astype(np.float32)
+            elif dims == 3:
+                return np.ones((batch, 4, 6)).astype(np.float32)
+            else:
+                return np.ones((batch, 4, 6, 8)).astype(np.float32)
+        for dims in [1, 2, 3, 4]:
+            for batch in [1, 2]:
+                self.dims = dims
+                dics = [{}]
+                ops_config = [
+                    {
+                        "op_type": "cast",
+                        "op_inputs": {"X": ["condition_data"]},
+                        "op_outputs": {"Out": ["condition_data_bool"]},
+                        "op_attrs": {"in_dtype": 5, "out_dtype": 0},
+                        "outputs_dtype": {"condition_data_bool": np.bool},
+                    },
+                    {
+                        "op_type": "where",
+                        "op_inputs": {
+                            "Condition": ["condition_data_bool"],
+                            "X": ["input_x_data"],
+                            "Y": ["input_y_data"],
+                        },
+                        "op_outputs": {"Out": ["output_data"]},
+                        "op_attrs": dics[0],
+                        "outputs_dtype": {"condition_data_bool": np.bool},
+                    },
+                ]
+                ops = self.generate_op_config(ops_config)
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={},
+                    inputs={
+                        "condition_data": TensorConfig(
+                            data_gen=partial(generate_input1, dims, batch)
+                        ),
+                        "input_x_data": TensorConfig(
+                            data_gen=partial(generate_input2, dims, batch)
+                        ),
+                        "input_y_data": TensorConfig(
+                            data_gen=partial(generate_input3, dims, batch)
+                        ),
+                    },
+                    outputs=["output_data"],
+                )
+                yield program_config
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 1:
+                self.dynamic_shape.min_input_shape = {
+                    "condition_data": [1],
+                    "condition_data_bool": [1],
+                    "input_x_data": [1],
+                    "input_y_data": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "condition_data": [2],
+                    "condition_data_bool": [2],
+                    "input_x_data": [2],
+                    "input_y_data": [2],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "condition_data": [1],
+                    "condition_data_bool": [1],
+                    "input_x_data": [1],
+                    "input_y_data": [1],
+                }
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "condition_data": [1, 4],
+                    "condition_data_bool": [1, 4],
+                    "input_x_data": [1, 4],
+                    "input_y_data": [1, 4],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "condition_data": [2, 4],
+                    "condition_data_bool": [2, 4],
+                    "input_x_data": [2, 4],
+                    "input_y_data": [2, 4],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "condition_data": [1, 4],
+                    "condition_data_bool": [1, 4],
+                    "input_x_data": [1, 4],
+                    "input_y_data": [1, 4],
+                }
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "condition_data": [1, 4, 6],
+                    "condition_data_bool": [1, 4, 6],
+                    "input_x_data": [1, 4, 6],
+                    "input_y_data": [1, 4, 6],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "condition_data": [2, 4, 6],
+                    "condition_data_bool": [2, 4, 6],
+                    "input_x_data": [2, 4, 6],
+                    "input_y_data": [2, 4, 6],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "condition_data": [1, 4, 6],
+                    "condition_data_bool": [1, 4, 6],
+                    "input_x_data": [1, 4, 6],
+                    "input_y_data": [1, 4, 6],
+                }
+            elif self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "condition_data": [1, 4, 6, 8],
+                    "condition_data_bool": [1, 4, 6, 8],
+                    "input_x_data": [1, 4, 6, 8],
+                    "input_y_data": [1, 4, 6, 8],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "condition_data": [2, 4, 6, 8],
+                    "condition_data_bool": [2, 4, 6, 8],
+                    "input_x_data": [2, 4, 6, 8],
+                    "input_y_data": [2, 4, 6, 8],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "condition_data": [1, 4, 6, 8],
+                    "condition_data_bool": [1, 4, 6, 8],
+                    "input_x_data": [1, 4, 6, 8],
+                    "input_y_data": [1, 4, 6, 8],
+                }
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if not dynamic_shape:
+                return 0, 6
+            return 1, 5
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False
+        ), 1e-5
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+    def test(self):
+        self.run_test()
+if __name__ == "__main__":
+    unittest.main()