[Hackathon NO.75] 为 Paddle-TRT 添加 expend_as_v2 算子 (#51028)

--------- Co-authored-by: N Zhang Jun <ewalker@live.cn>

[Hackathon NO.75] 为 Paddle-TRT 添加 expend_as_v2 算子 (#51028)
--------- Co-authored-by: N Zhang Jun <ewalker@live.cn>
94cc1d6b · gaoziyuan · GitHub · 57201d9d · 94cc1d6b · 94cc1d6b
4 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2685,6 +2685,7 @@ USE_TRT_CONVERTER(tanh_shrink)
 USE_TRT_CONVERTER(logsigmoid)
 USE_TRT_CONVERTER(lookup_table)
 USE_TRT_CONVERTER(expand_v2)
+USE_TRT_CONVERTER(expand_as_v2)
 USE_TRT_CONVERTER(take_along_axis)
 USE_TRT_CONVERTER(skip_groupnorm_act)
 USE_TRT_CONVERTER(preln_groupnorm_act)

--- a/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/expand_v2_op.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,12 +18,12 @@ namespace paddle {
 namespace inference {
 namespace tensorrt {
-class ExpandV2OpConverter : public OpConverter {
+class ExpandOpConverter : public OpConverter {
 public:
  void operator()(const framework::proto::OpDesc& op,
                  const framework::Scope& scope,
                  bool test_mode) override {
-    VLOG(3) << "convert a expand_v2 op to trt expand layer.";
+    VLOG(3) << "convert a paddle " << op_type_ << " op to trt expand layer.";
    framework::OpDesc op_desc(op, nullptr);
    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
    auto inputs = op_desc.Inputs();
@@ -33,25 +33,40 @@ class ExpandV2OpConverter : public OpConverter {
    nvinfer1::ITensor* shape_tensor = nullptr;
    int32_t shape_rank = 0;
-    if (inputs.find("Shape") != inputs.end() &&
-        op_desc.Input("Shape").size() >= 1) {
+    if (op_type_ == "expand_v2") {
-      shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]);
+      if (inputs.find("Shape") != inputs.end() &&
-      shape_rank = shape_tensor->getDimensions().d[0];
+          op_desc.Input("Shape").size() >= 1) {
-    } else if (inputs.find("expand_shapes_tensor") != inputs.end() &&
+        shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]);
-               op_desc.Input("expand_shapes_tensor").size() >= 1) {
+        shape_rank = shape_tensor->getDimensions().nbDims;
-      int shape_size = op_desc.Input("expand_shapes_tensor").size();
+      } else if (inputs.find("expand_shapes_tensor") != inputs.end() &&
-      std::vector<nvinfer1::ITensor*> shape_tensors;
+                 op_desc.Input("expand_shapes_tensor").size() >= 1) {
-      for (int i = 0; i < shape_size; ++i) {
+        int shape_size = op_desc.Input("expand_shapes_tensor").size();
-        shape_tensors.push_back(
+        std::vector<nvinfer1::ITensor*> shape_tensors;
-            engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i]));
+        for (int i = 0; i < shape_size; ++i) {
+          shape_tensors.push_back(
+              engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i]));
+        }
+        shape_tensor = Concat(shape_tensors);
+        shape_rank = shape_size;
+      } else {
+        std::vector<int32_t> shape =
+            PADDLE_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("shape"));
+        shape_tensor =
+            Add1DConstantLayer(shape, output_name + "_shape_tensor_");
+        shape_rank = shape.size();
+      }
+    } else if (op_type_ == "expand_as_v2") {
+      if (inputs.find("Y") != inputs.end()) {
+        shape_tensor = engine_->GetITensor(op_desc.Input("Y")[0]);
+        shape_rank = shape_tensor->getDimensions().nbDims;
+      } else {
+        std::vector<int32_t> shape = PADDLE_GET_CONST(
+            std::vector<int32_t>, op_desc.GetAttr("target_shape"));
+        shape_tensor =
+            Add1DConstantLayer(shape, output_name + "_target_shape_tensor_");
+        shape_rank = shape.size();
      }
-      shape_tensor = Concat(shape_tensors);
-      shape_rank = shape_size;
-    } else {
-      std::vector<int32_t> shape =
-          PADDLE_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("shape"));
-      shape_tensor = Add1DConstantLayer(shape, output_name + "_shape_tensor_");
-      shape_rank = shape.size();
    }
    nvinfer1::ITensor* input_shape_tensor;
@@ -68,8 +83,7 @@ class ExpandV2OpConverter : public OpConverter {
      input_shape_tensor = Shape(input);
    }
-    auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
+    auto* newInputTensor = Reshape(input, input_shape_tensor);
-    shuffle->setInput(1, *input_shape_tensor);
    std::vector<int32_t> start_vec(shape_rank, 0);
    nvinfer1::Dims start;
@@ -91,13 +105,26 @@ class ExpandV2OpConverter : public OpConverter {
    auto strides_tensor = Min(one_tensor, input_sub_tensor);
    auto layer = TRT_ENGINE_ADD_LAYER(
-        engine_, Slice, *shuffle->getOutput(0), start, size, stride);
+        engine_, Slice, *newInputTensor, start, size, stride);
    layer->setInput(1, *starts_tensor);
    layer->setInput(2, *sizes_tensor);
    layer->setInput(3, *strides_tensor);
-    RreplenishLayerAndOutput(layer, "expand_v2", {output_name}, test_mode);
+    RreplenishLayerAndOutput(layer, op_type_, {output_name}, test_mode);
  }
+ protected:
+  std::string op_type_;
+};
+class ExpandV2OpConverter : public ExpandOpConverter {
+ public:
+  ExpandV2OpConverter() { op_type_ = "expand_v2"; }
+};
+class ExpandAsV2OpConverter : public ExpandOpConverter {
+ public:
+  ExpandAsV2OpConverter() { op_type_ = "expand_as_v2"; }
 };
 }  // namespace tensorrt
@@ -105,3 +132,4 @@ class ExpandV2OpConverter : public OpConverter {
 }  // namespace paddle
 REGISTER_TRT_OP_CONVERTER(expand_v2, ExpandV2OpConverter);
+REGISTER_TRT_OP_CONVERTER(expand_as_v2, ExpandAsV2OpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2654,11 +2654,35 @@ struct SimpleOpTypeSetTeller : public Teller {
      }
    }
-    if (op_type == "expand_v2") {
+    if (op_type == "expand_as_v2" || op_type == "expand_v2") {
      if (!with_dynamic_shape) {
+        VLOG(3) << "the " << op_type
+                << "does not support "
+                   "static shape yet";
        return false;
      }
-      if (!desc.HasAttr("shape")) {
+      auto inputs = desc.Inputs();
+      if (op_type == "expand_as_v2") {
+        if (!desc.HasAttr("target_shape") && inputs.find("Y") == inputs.end()) {
+          VLOG(3)
+              << "expand_as_v2 op need have input(Y) or attr(target_shape). ";
+          return false;
+        }
+      } else if (op_type == "expand_v2") {
+        if (!desc.HasAttr("shape") && inputs.find("Shape") == inputs.end() &&
+            inputs.find("expand_shapes_tensor") == inputs.end()) {
+          VLOG(3) << "expand_v2 op need have input(Shape) or "
+                     "input(expand_shapes_tensor) or attr(shape) . ";
+          return false;
+        }
+      }
+      auto* block = desc.Block();
+      if (block == nullptr) {
+        VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
+                   "Developers need to check whether block_desc is passed in "
+                   "the pass.";
        return false;
      }
    }
@@ -2921,6 +2945,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "skip_merge_layernorm",
      "lookup_table_v2",
      "expand_v2",
+      "expand_as_v2",
      "fuse_eleadd_transpose",
      "skip_groupnorm_act",
      "preln_groupnorm_act",
@@ -3080,6 +3105,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "lookup_table",
      "lookup_table_v2",
      "expand_v2",
+      "expand_as_v2",
      "fuse_eleadd_transpose",
      "skip_groupnorm_act",
      "preln_groupnorm_act",

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_as_v2.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_expand_as_v2.py
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+from functools import partial
+from typing import Any, Dict, List
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+import paddle.inference as paddle_infer
+class TrtConvertExpandASV2Test(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        if len(attrs[0]['target_shape']) < self.dims:
+            return False
+        if self.dims == 1:
+            if len(attrs[0]['target_shape']) == 4:
+                return False
+        return True
+    def sample_program_configs(self):
+        def generate_input1(attrs: List[Dict[str, Any]]):
+            if self.dims == 4:
+                self.input_shape = [1, 8, 1, 32]
+                return np.random.random([1, 8, 1, 32]).astype(np.float32)
+            elif self.dims == 3:
+                self.input_shape = [1, 32, 32]
+                return np.random.random([1, 32, 32]).astype(np.float32)
+            elif self.dims == 2:
+                self.input_shape = [1, 32]
+                return np.random.random([1, 32]).astype(np.float32)
+            elif self.dims == 1:
+                self.input_shape = [32]
+                return np.random.random([32]).astype(np.float32)
+        for dims in [1, 2, 3, 4]:
+            for shape in [
+                [10, 8, 32, 32],
+                [2, 8, 32, 32],
+                [8, 32, 32],
+                [2, 32],
+                [32],
+            ]:
+                dics = [
+                    {
+                        "target_shape": shape,
+                    },
+                ]
+                self.dims = dims
+                ops_config = [
+                    {
+                        "op_type": "expand_as_v2",
+                        "op_inputs": {"X": ["expand_v2_input"]},
+                        "op_outputs": {"Out": ["expand_v2_out"]},
+                        "op_attrs": dics[0],
+                    }
+                ]
+                ops = self.generate_op_config(ops_config)
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={},
+                    inputs={
+                        "expand_v2_input": TensorConfig(
+                            data_gen=partial(generate_input1, dics)
+                        )
+                    },
+                    outputs=["expand_v2_out"],
+                )
+                yield program_config
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "expand_v2_input": [1, 8, 1, 32]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "expand_v2_input": [10, 8, 1, 32]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "expand_v2_input": [1, 8, 1, 32]
+                }
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "expand_v2_input": [1, 32, 32]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "expand_v2_input": [8, 32, 32]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "expand_v2_input": [1, 32, 32]
+                }
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "expand_v2_input": [1, 32]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "expand_v2_input": [4, 32]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "expand_v2_input": [1, 32]
+                }
+            elif self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"expand_v2_input": [32]}
+                self.dynamic_shape.max_input_shape = {"expand_v2_input": [64]}
+                self.dynamic_shape.opt_input_shape = {"expand_v2_input": [32]}
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if dynamic_shape:
+                return 1, 2
+            else:
+                return 0, 3
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        clear_dynamic_shape()
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-3
+    def add_skip_trt_case(self):
+        pass
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+class TrtConvertExpandV2Test2(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        return True
+    def sample_program_configs(self):
+        def generate_input1(attrs: List[Dict[str, Any]]):
+            if self.dims == 1:
+                self.input_shape = [1]
+                return np.random.random([1]).astype(np.float32)
+        for dims in [1]:
+            for shape in [[10]]:
+                dics = [
+                    {
+                        "target_shape": shape,
+                    },
+                ]
+                self.dims = dims
+                dics_intput = [
+                    {"X": ["expand_v2_input"], "Y": ["shapeT1_data"]},
+                ]
+                ops_config = [
+                    {
+                        "op_type": "fill_constant",
+                        "op_inputs": {},
+                        "op_outputs": {"Out": ["shapeT1_data"]},
+                        "op_attrs": {
+                            "dtype": 2,
+                            "str_value": "10",
+                            "shape": [1],
+                        },
+                    },
+                    {
+                        "op_type": "expand_as_v2",
+                        "op_inputs": dics_intput[0],
+                        "op_outputs": {"Out": ["expand_v2_out"]},
+                        "op_attrs": dics[0],
+                    },
+                ]
+                ops = self.generate_op_config(ops_config)
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={},
+                    inputs={
+                        "expand_v2_input": TensorConfig(
+                            data_gen=partial(generate_input1, dics)
+                        )
+                    },
+                    outputs=["expand_v2_out"],
+                )
+                yield program_config
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape():
+            if self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"expand_v2_input": [1]}
+                self.dynamic_shape.max_input_shape = {"expand_v2_input": [1]}
+                self.dynamic_shape.opt_input_shape = {"expand_v2_input": [1]}
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+        clear_dynamic_shape()
+        # for dynamic_shape
+        generate_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        # fill_constant will be folded by constnt folding pass!
+        yield self.create_inference_config(), (1, 2), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 2), 1e-3
+    def add_skip_trt_case(self):
+        pass
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+if __name__ == "__main__":
+    unittest.main()