[Paddle-TRT] add flip op (#55688)

* [Paddle-TRT] add flip op

[Paddle-TRT] add flip op (#55688)
* [Paddle-TRT] add flip op
d608170a · ming1753 · GitHub · 4191f2c6 · d608170a · d608170a
5 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2918,6 +2918,7 @@ USE_TRT_CONVERTER(preln_groupnorm_act)
 USE_TRT_CONVERTER(cumsum)
 USE_TRT_CONVERTER(assign)
 USE_TRT_CONVERTER(unbind)
+USE_TRT_CONVERTER(flip)
 #if IS_TRT_VERSION_GE(8522)
 USE_TRT_CONVERTER(flash_multihead_matmul)
 USE_TRT_CONVERTER(cross_multihead_matmul)

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -108,7 +108,8 @@ list(
  temporal_shift_op.cc
  einsum_op.cc
  unbind_op.cc
-  assign_op.cc)
+  assign_op.cc
+  flip_op.cc)
 if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 7)
  list(APPEND CONVERT_FILES emb_eltwise_layernorm.cc

--- a/paddle/fluid/inference/tensorrt/convert/flip_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/flip_op.cc
+/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+class FlipOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(4) << "convert a flip op to tensorrt layer";
+    framework::OpDesc op_desc(op, nullptr);
+    // Declare inputs
+    auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
+    auto input_dims = input->getDimensions();
+    // Get Attrs
+    std::vector<int> axis =
+        PADDLE_GET_CONST(std::vector<int>, op_desc.GetAttr("axis"));
+    for (size_t i = 0; i < axis.size(); ++i) {
+      axis[i] += (axis[i] < 0) ? input_dims.nbDims : 0;
+    }
+    nvinfer1::ITensor* shape_tensor = Shape(input);
+    // getAxisLength default is a scalar
+    auto getAxisLength = [&](int axis, bool scalar = true) {
+      int d = input_dims.d[axis];
+      if (d >= 0) {
+        return Add1DConstantLayer(d, "", scalar);
+      } else {
+        return GetEleTensorOfShape(shape_tensor, axis, scalar);
+      }
+    };
+    for (size_t i = 0; i < axis.size(); ++i) {
+      auto loop = TRT_ENGINE_ADD_LAYER(engine_, Loop);
+      nvinfer1::ITensor* tripLimit = getAxisLength(axis[i]);
+      loop->addTripLimit(*tripLimit, nvinfer1::TripLimit::kCOUNT);
+      auto iterator = loop->addIterator(*input, axis[i], true);
+      std::vector<int32_t> zero_vec{0};
+      std::vector<int32_t> one_vec{1};
+      auto zero = Add1DConstantLayer(zero_vec);
+      auto one = Add1DConstantLayer(one_vec);
+      nvinfer1::IRecurrenceLayer* iRec = loop->addRecurrence(*zero);
+      nvinfer1::ITensor* iCur = iRec->getOutput(0);
+      auto iNext = TRT_ENGINE_ADD_LAYER(engine_,
+                                        ElementWise,
+                                        *iCur,
+                                        *one,
+                                        nvinfer1::ElementWiseOperation::kSUM);
+      iRec->setInput(1, *iNext->getOutput(0));
+      nvinfer1::ILoopOutputLayer* loopOut = loop->addLoopOutput(
+          *iterator->getOutput(0), nvinfer1::LoopOutput::kCONCATENATE, axis[i]);
+      loopOut->setInput(1, *tripLimit);
+      input = loopOut->getOutput(0);
+    }
+    auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input);
+    auto output_name = op_desc.Output("Out")[0];
+    RreplenishLayerAndOutput(layer, "flip", {output_name}, test_mode);
+  }
+};
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+REGISTER_TRT_OP_CONVERTER(flip, FlipOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -2730,6 +2730,18 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
    }
+    if (op_type == "flip") {
+      if (!with_dynamic_shape) {
+        VLOG(3) << "the flip does not support "
+                   "static shape yet";
+        return false;
+      }
+#if !IS_TRT_VERSION_GE(7220)
+      VLOG(3) << "flip is not supported when TensorRT below 7.2.2";
+      return false;
+#endif
+    }
    if (use_no_calib_int8) {
      return int8_teller_set.count(op_type);
    } else {
@@ -2900,7 +2912,8 @@ struct SimpleOpTypeSetTeller : public Teller {
      "grid_sampler",
      "cumsum",
      "unbind",
-      "assign"};
+      "assign",
+      "flip"};
  std::unordered_set<std::string> teller_set{
      "matrix_multiply",
@@ -3064,7 +3077,8 @@ struct SimpleOpTypeSetTeller : public Teller {
      "grid_sampler",
      "cumsum",
      "unbind",
-      "assign"};
+      "assign",
+      "flip"};
 };
 struct GenericPluginTeller : public Teller {

--- a/test/ir/inference/test_trt_convert_flip.py
+++ b/test/ir/inference/test_trt_convert_flip.py
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+from functools import partial
+from typing import List
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+import paddle.inference as paddle_infer
+class TrtConvertFlipTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        ver = paddle_infer.get_trt_compile_version()
+        if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220:
+            return False
+        return True
+    def sample_program_configs(self):
+        def generate_input(batch):
+            if self.dims == 4:
+                return np.random.random([batch, 3, 3, 24]).astype(np.float32)
+            elif self.dims == 3:
+                return np.random.random([batch, 3, 24]).astype(np.float32)
+            elif self.dims == 2:
+                return np.random.random([batch, 24]).astype(np.float32)
+            elif self.dims == 1:
+                return np.random.random([24]).astype(np.int32)
+        def generate_axis():
+            return np.arange(self.dims).tolist()
+        for dims in [2, 3, 4]:
+            for batch in [3, 6, 9]:
+                self.dims = dims
+                axis = generate_axis()
+                ops_config = [
+                    {
+                        "op_type": "flip",
+                        "op_inputs": {
+                            "X": ["input_data"],
+                        },
+                        "op_outputs": {"Out": ["output_data"]},
+                        "op_attrs": {"axis": axis},
+                    }
+                ]
+                ops = self.generate_op_config(ops_config)
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={},
+                    inputs={
+                        "input_data": TensorConfig(
+                            data_gen=partial(generate_input, batch)
+                        ),
+                    },
+                    outputs=["output_data"],
+                )
+                yield program_config
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if self.dims == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3 - 1, 3 - 1, 24 - 1]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [9, 3 + 1, 3 + 1, 24 + 1]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 3, 24]
+                }
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3 - 1, 24 - 1]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [9, 3 + 1, 24 + 1]
+                }
+                self.dynamic_shape.opt_input_shape = {"input_data": [1, 3, 24]}
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 24]}
+                self.dynamic_shape.max_input_shape = {"input_data": [9, 24]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [1, 24]}
+            elif self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [24 - 1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [24 + 1]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [24]}
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 7220:
+                return 0, 3
+            return 1, 2
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        self.trt_param.max_batch_size = 9
+        self.trt_param.workspace_size = 1073741824
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), 1e-3
+    def test(self):
+        self.run_test()
+if __name__ == "__main__":
+    unittest.main()