add TRT op unbind (#55476)

4a55f5e7 · chen · GitHub · 89e54d69 · 4a55f5e7 · 4a55f5e7
5 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2916,6 +2916,7 @@ USE_TRT_CONVERTER(skip_groupnorm_act)
 USE_TRT_CONVERTER(preln_groupnorm_act)
 USE_TRT_CONVERTER(cumsum)
 USE_TRT_CONVERTER(assign)
+USE_TRT_CONVERTER(unbind)
 #if IS_TRT_VERSION_GE(8522)
 USE_TRT_CONVERTER(flash_multihead_matmul)
 USE_TRT_CONVERTER(cross_multihead_matmul)

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -107,6 +107,7 @@ list(
  cumsum_op.cc
  temporal_shift_op.cc
  einsum_op.cc
+  unbind_op.cc
  assign_op.cc)
 if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)

--- a/paddle/fluid/inference/tensorrt/convert/unbind_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/unbind_op.cc
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+/*
+ * Unbind Op
+ */
+class UnbindOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(3) << "convert a unbind op to tensorrt layer";
+    framework::OpDesc op_desc(op, nullptr);
+    std::string input_x_name = op_desc.Input("X").front();
+    auto* input_x_tensor = engine_->GetITensor(input_x_name);
+    auto in_dims = input_x_tensor->getDimensions();
+    auto in_shape_tensor = Shape(input_x_tensor);
+    auto rank = in_dims.nbDims;
+    int axis = 0;
+    if (op_desc.HasAttr("axis")) {
+      axis = PADDLE_GET_CONST(int, op_desc.GetAttr("axis"));
+      if (axis < 0) {
+        axis += rank;
+      }
+    }
+    std::vector<nvinfer1::ITensor*> in_shape_tensors;
+    std::vector<nvinfer1::ITensor*> newDims_tensors;
+    for (int32_t i = 0; i < rank; ++i) {
+      in_shape_tensors.push_back(GetEleTensorOfShape(in_shape_tensor, i));
+      if (i != axis) {
+        newDims_tensors.push_back(GetEleTensorOfShape(in_shape_tensor, i));
+      }
+    }
+    auto newDims_tensor = Concat(newDims_tensors);
+    std::vector<nvinfer1::ITensor*> start_tensors;
+    std::vector<nvinfer1::ITensor*> size_tensors = in_shape_tensors;
+    nvinfer1::Dims stride;
+    stride.nbDims = rank;
+    for (int i = 0; i < rank; ++i) {
+      if (axis == i) {
+        size_tensors[i] = Add1DConstantLayer(1);
+      }
+      start_tensors.push_back(Add1DConstantLayer(0));
+      stride.d[i] = 1;
+    }
+    int ii = 0;
+    for (auto& output_name : op_desc.Output("Out")) {
+      start_tensors[axis] = Add1DConstantLayer(ii++);
+      // 1 slice
+      auto inputSliced = TRT_ENGINE_ADD_LAYER(
+          engine_, Slice, *input_x_tensor, stride, stride, stride);
+      inputSliced->setInput(1, *Concat(start_tensors));
+      inputSliced->setInput(2, *Concat(size_tensors));
+      auto inputSliced_out = inputSliced->getOutput(0);
+      // 2 reshape
+      auto inputReshaped =
+          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *inputSliced_out);
+      inputReshaped->setInput(1, *newDims_tensor);
+      RreplenishLayerAndOutput(
+          inputReshaped, "unbind", {output_name}, test_mode);
+    }
+  }
+};
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+REGISTER_TRT_OP_CONVERTER(unbind, UnbindOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2663,6 +2663,21 @@ struct SimpleOpTypeSetTeller : public Teller {
      }
    }
+    if (op_type == "unbind") {
+      if (!with_dynamic_shape) {
+        VLOG(3) << "the unbind does not support "
+                   "static shape yet";
+        return false;
+      }
+      auto* block = desc.Block();
+      if (block == nullptr) {
+        VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
+                   "Developers need to check whether block_desc is passed in "
+                   "the pass.";
+        return false;
+      }
+    }
    if (op_type == "temporal_shift") {
 #if !IS_TRT_VERSION_GE(8200)
      VLOG(3) << "temporal_shift is not supported when TensorRT < 8.2";
@@ -2901,6 +2916,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "temporal_shift",
      "grid_sampler",
      "cumsum",
+      "unbind",
      "assign"};
  std::unordered_set<std::string> teller_set{
@@ -3064,6 +3080,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "temporal_shift",
      "grid_sampler",
      "cumsum",
+      "unbind",
      "assign"};
 };

--- a/test/ir/inference/test_trt_convert_unbind.py
+++ b/test/ir/inference/test_trt_convert_unbind.py
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+from functools import partial
+from typing import List
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+import paddle.inference as paddle_infer
+class TrtConvertUnbind(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+    def sample_program_configs(self):
+        # self.trt_param.workspace_size = 1073741824
+        def generate_input1():
+            self.input_shape = [3, 400, 196, 80]
+            return np.random.random([3, 400, 196, 80]).astype(np.float32)
+        for dims in [4]:
+            for axis in [0]:
+                # for type in ["int32", "int64", "float32", "float64"]:
+                self.dims = dims
+                ops_config = [
+                    {
+                        "op_type": "unbind",
+                        "op_inputs": {
+                            "X": ["input_data"],
+                        },
+                        "op_outputs": {
+                            "Out": [
+                                "output_data0",
+                                "output_data1",
+                                "output_data2",
+                            ]
+                        },
+                        "op_attrs": {"axis": axis},
+                    }
+                ]
+                ops = self.generate_op_config(ops_config)
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={},
+                    inputs={
+                        "input_data": TensorConfig(
+                            data_gen=partial(generate_input1)
+                        ),
+                    },
+                    outputs=["output_data0", "output_data1", "output_data2"],
+                )
+                yield program_config
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            return 1, 4
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [3, 100, 196, 80]
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [3, 400, 196, 80]
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [3, 400, 196, 80]
+            }
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        # for static_shape
+        # clear_dynamic_shape()
+        # self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        # yield self.create_inference_config(), (0, 6), 1e-5
+        # self.trt_param.precision = paddle_infer.PrecisionType.Half
+        # yield self.create_inference_config(), (0, 6), 1e-3
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-3
+    def test(self):
+        self.run_test()
+if __name__ == "__main__":
+    unittest.main()