From 9ae6c8540b4da6d52d93d1e611930dad003c1b3f Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com>
Date: Tue, 29 Nov 2022 18:41:09 +0800
Subject: [PATCH] [Paddle Inference] Add take_along_axis trt converter (#48358)

---
 .../fluid/inference/api/analysis_predictor.cc |   1 +
 .../inference/tensorrt/convert/CMakeLists.txt |   1 +
 .../tensorrt/convert/take_along_axis_op.cc    |  62 ++++++
 paddle/fluid/inference/tensorrt/op_teller.cc  |  32 +++
 .../test_trt_convert_take_along_axis.py       | 187 ++++++++++++++++++
 5 files changed, 283 insertions(+)
 create mode 100644 paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 0a8b3d5eb2..0fa6f243ce 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2341,6 +2341,7 @@ USE_TRT_CONVERTER(tanh_shrink)
 USE_TRT_CONVERTER(logsigmoid)
 USE_TRT_CONVERTER(lookup_table)
 USE_TRT_CONVERTER(expand_v2)
+USE_TRT_CONVERTER(take_along_axis)
 #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
 USE_TRT_CONVERTER(sparse_fc)
 USE_TRT_CONVERTER(sparse_multihead_matmul)
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 070e7c2c0f..b796cf1c2a 100755
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -82,6 +82,7 @@ list(
   celu_op.cc
   layernorm_shift_partition_op.cc
   tanhshrink_op.cc
+  take_along_axis_op.cc
   logsigmoid_op.cc
   preln_layernorm_shift_partition_op.cc
   merge_layernorm_op.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc b/paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc
new file mode 100644
index 0000000000..af43d859bb
--- /dev/null
+++ b/paddle/fluid/inference/tensorrt/convert/take_along_axis_op.cc
@@ -0,0 +1,62 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * TakeAlongAxis Op
+ */
+class TakeAlongAxisOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    // AddGatherV2 is supported by the trt version of 8.2.
+#if IS_TRT_VERSION_GE(8200)
+    VLOG(3) << "convert take_along_axis op to tensorrt take_along_axis layer";
+    framework::OpDesc op_desc(op, nullptr);
+    const auto input_tensor = engine_->GetITensor(op_desc.Input("Input")[0]);
+    const auto index_tensor = engine_->GetITensor(op_desc.Input("Index")[0]);
+    auto output_name = op_desc.Output("Result")[0];
+
+    int axis = 0;
+    if (op_desc.HasAttr("Axis")) {
+      axis = PADDLE_GET_CONST(int, op_desc.GetAttr("Axis"));
+    }
+    auto input_dims = input_tensor->getDimensions();
+    int NbDims = input_dims.nbDims;
+    if (axis < 0) axis = axis + NbDims;
+
+    auto layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                      GatherV2,
+                                      *input_tensor,
+                                      *index_tensor,
+                                      nvinfer1::GatherMode::kELEMENT);
+    layer->setGatherAxis(axis);
+
+    RreplenishLayerAndOutput(
+        layer, "take_along_axis", {output_name}, test_mode);
+#endif
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(take_along_axis, TakeAlongAxisOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index 0e180cd7d6..58f99ff3d2 100755
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -596,6 +596,36 @@ struct SimpleOpTypeSetTeller : public Teller {
 #endif
     }
 
+    if (op_type == "take_along_axis") {
+#if IS_TRT_VERSION_GE(8200)
+      if (!with_dynamic_shape) return false;
+      auto* block = desc.Block();
+      auto input_var_name = desc.Input("Input")[0];
+      auto index_var_name = desc.Input("Index")[0];
+      auto* input_var_desc = block->FindVar(input_var_name);
+      auto* index_var_desc = block->FindVar(index_var_name);
+
+      // The index input must be int32 datatype.
+      if (index_var_desc->GetDataType() !=
+          paddle::framework::proto::VarType_Type::VarType_Type_INT32) {
+        VLOG(3) << "take_along_axis op Index input data type must be int32";
+        return false;
+      }
+
+      const auto input_shape = input_var_desc->GetShape();
+      const auto index_shape = index_var_desc->GetShape();
+      if (input_shape.size() != index_shape.size()) {
+        VLOG(3) << "take_along_axis op Index input dims size ["
+                << index_shape.size() << " ] not equal to input dims size ["
+                << input_shape.size() << "]";
+        return false;
+      }
+#else
+      VLOG(3) << "take_along_axis op is only supported by trt8.2 above ";
+      return false;
+#endif
+    }
+
     if (op_type == "anchor_generator") {
       if (!with_dynamic_shape) return false;
     }
@@ -2399,6 +2429,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "squeeze2",
       "unsqueeze2",
       "layernorm_shift_partition",
+      "take_along_axis",
       "tanh_shrink",
       "logsigmoid",
       "preln_layernorm_shift_partition",
@@ -2530,6 +2561,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "fused_token_prune",
       "layernorm_shift_partition",
       "tanh_shrink",
+      "take_along_axis",
       "logsigmoid",
       "preln_layernorm_shift_partition",
       "merge_layernorm",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py
new file mode 100644
index 0000000000..63b340f880
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_take_along_axis.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertTakeAlongAxisTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        inputs = program_config.inputs
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        if len(inputs['input_data'].shape) <= attrs[0]['Axis']:
+            return False
+        if len(inputs['input_data'].shape) != len(inputs['index_data'].shape):
+            return False
+
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        def generate_input2(index):
+            return np.zeros(index).astype(np.int32)
+
+        def generate_input3(axis):
+            return np.array([axis]).astype(np.int32)
+
+        for shape in [[32], [3, 64], [1, 64, 16], [1, 64, 16, 32]]:
+            for index in [[1], [1, 1], [1, 1, 2], [1, 1, 1, 1]]:
+                for axis in [0, 1, 2, 3]:
+                    self.shape = shape
+                    self.axis = axis
+                    dics = [{"Axis": axis}]
+                    ops_config = [
+                        {
+                            "op_type": "take_along_axis",
+                            "op_inputs": {
+                                "Input": ["input_data"],
+                                "Index": ["index_data"],
+                            },
+                            "op_outputs": {"Result": ["output_data"]},
+                            "op_attrs": dics[0],
+                        }
+                    ]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data": TensorConfig(
+                                data_gen=partial(generate_input1, shape)
+                            ),
+                            "index_data": TensorConfig(
+                                data_gen=partial(generate_input2, index)
+                            ),
+                        },
+                        outputs=["output_data"],
+                    )
+
+                    yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if len(self.shape) == 1:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [4],
+                    "index_data": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [128],
+                    "index_data": [4],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [16],
+                    "index_data": [2],
+                }
+            elif len(self.shape) == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [3, 64],
+                    "index_data": [1, 1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [3, 64],
+                    "index_data": [1, 1],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [3, 64],
+                    "index_data": [1, 1],
+                }
+            elif len(self.shape) == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 64, 16],
+                    "index_data": [1, 1, 2],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [1, 64, 16],
+                    "index_data": [1, 1, 2],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 64, 16],
+                    "index_data": [1, 1, 2],
+                }
+            elif len(self.shape) == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 64, 16, 32],
+                    "index_data": [1, 1, 1, 1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [1, 64, 16, 32],
+                    "index_data": [1, 1, 1, 1],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 64, 16, 32],
+                    "index_data": [1, 1, 1, 1],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(dynamic_shape):
+            ver = paddle_infer.get_trt_compile_version()
+            if (
+                ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 > 8200
+                and dynamic_shape
+            ):
+                return 1, 3
+            else:
+                return 0, 4
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            False
+        ), 1e-5
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(True), 1e-3
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab