From 543da561bf12cd6c642bfd6ee9d450ec86762a72 Mon Sep 17 00:00:00 2001
From: xjmxyt <46990504+xjmxyt@users.noreply.github.com>
Date: Thu, 16 Mar 2023 10:32:11 +0800
Subject: [PATCH] add index select op (#51498)

* add index select op

* add to op teller

* add trt version control

* delete useless code
---
 .../fluid/inference/api/analysis_predictor.cc |   1 +
 .../inference/tensorrt/convert/CMakeLists.txt |   1 +
 .../tensorrt/convert/index_select_op.cc       |  75 +++++++
 paddle/fluid/inference/tensorrt/op_teller.cc  |  31 +++
 .../test_trt_convert_index_select.py          | 195 ++++++++++++++++++
 5 files changed, 303 insertions(+)
 create mode 100644 paddle/fluid/inference/tensorrt/convert/index_select_op.cc
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_index_select.py

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index ccda587530b..8b294fa0b3a 100644
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2546,6 +2546,7 @@ USE_TRT_CONVERTER(grid_sampler)
 #endif
 #if IS_TRT_VERSION_GE(8200)
 USE_TRT_CONVERTER(set_value)
+USE_TRT_CONVERTER(index_select);
 USE_TRT_CONVERTER(temporal_shift)
 #endif
 #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 487e8c9a78a..1c679e44876 100755
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -46,6 +46,7 @@ list(
   hard_swish_op.cc
   clip_op.cc
   gather_op.cc
+  index_select_op.cc
   anchor_generator_op.cc
   yolo_box_op.cc
   yolo_box_head_op.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/index_select_op.cc b/paddle/fluid/inference/tensorrt/convert/index_select_op.cc
new file mode 100644
index 00000000000..6f98371210c
--- /dev/null
+++ b/paddle/fluid/inference/tensorrt/convert/index_select_op.cc
@@ -0,0 +1,75 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+/*
+ * Gather Op
+ */
+class IndexSelectConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(3) << "convert a fluid index select op to tensorrt index select layer";
+
+    framework::OpDesc op_desc(op, nullptr);
+    std::string input_name = op_desc.Input("X").front();
+    std::string index_name = op_desc.Input("Index").front();
+    std::string output_name = op_desc.Output("Out").front();
+    const auto input_tensor = engine_->GetITensor(input_name);
+    const auto index_tensor = engine_->GetITensor(index_name);
+
+    int axis = 0;
+    if (op_desc.HasAttr("dim")) {
+      axis = PADDLE_GET_CONST(int, op_desc.GetAttr("dim"));
+    }
+
+    auto reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *index_tensor);
+
+    nvinfer1::Dims index_shape{};
+    index_shape.nbDims = 1;
+    index_shape.d[0] = -1;
+
+    reshape_layer->setReshapeDimensions(index_shape);
+    reshape_layer->setName(
+        ("Index select: Shuffle: (Output: " + output_name + ")").c_str());
+
+    auto layer = TRT_ENGINE_ADD_LAYER(
+        engine_, Gather, *input_tensor, *reshape_layer->getOutput(0), axis);
+    layer->setNbElementWiseDims(0);
+
+    RreplenishLayerAndOutput(layer, "index_select", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(index_select, IndexSelectConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index d2894bfbb2d..61916e0c73b 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -76,6 +76,8 @@ struct SimpleOpTypeSetTeller : public Teller {
     teller_set.insert("round");
     int8_teller_set.insert("round");
     teller_set.insert("set_value");
+    teller_set.insert("index_select");
+    int8_teller_set.insert("index_select");
 #endif
   }
 
@@ -650,7 +652,36 @@ struct SimpleOpTypeSetTeller : public Teller {
       }
 #endif
     }
+    if (op_type == "index_select") {
+#if !IS_TRT_VERSION_GE(8200)
+      return false;
+#endif
+      auto gather_inputs = desc.Inputs();
+      if (!with_dynamic_shape) {
+        return false;
+      } else {
+        auto* block = desc.Block();
+        if (block == nullptr) {
+          VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
+                     "Developers need to check whether block_desc is passed in "
+                     "the pass.";
+          return false;
+        }
+
+        auto index_var_name = desc.Input("Index")[0];
+        auto* index_var_desc = block->FindVar(index_var_name);
 
+        // The index input must be int32 or int64 datatype.
+        if (index_var_desc->GetDataType() !=
+                paddle::framework::proto::VarType_Type::VarType_Type_INT32 &&
+            index_var_desc->GetDataType() !=
+                paddle::framework::proto::VarType_Type::VarType_Type_INT64) {
+          VLOG(3)
+              << "Index select op Index input data type must be int32 or int64";
+          return false;
+        }
+      }
+    }
     if (op_type == "take_along_axis") {
 #if IS_TRT_VERSION_GE(8200)
       if (!with_dynamic_shape) return false;
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_index_select.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_index_select.py
new file mode 100644
index 00000000000..9d1d9fb26ba
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_index_select.py
@@ -0,0 +1,195 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+from functools import partial
+from typing import List
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertIndexSelectTest(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        inputs = program_config.inputs
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        if len(inputs['input_data'].shape) <= attrs[0]['dim']:
+            return False
+
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        def generate_input2(index):
+            return np.array(index).astype(np.int32)
+
+        def generate_input4(index):
+            return np.array(index).astype(np.int64)
+
+        def generate_input3(axis):
+            return np.array([axis]).astype(np.int32)
+
+        for shape in [[32, 64, 16, 32]]:
+            for index in [[1, 4], [4, 8]]:
+                for axis in [0, 1, 2, 3]:
+                    for overwrite in [True, False]:
+                        for input in [
+                            {"X": ["input_data"], "Index": ["index_data"]}
+                        ]:
+                            for index_type_int32 in [True, False]:
+                                self.shape = shape
+                                self.axis = axis
+                                self.input_num = len(input)
+                                self.index_type_int32 = index_type_int32
+                                dics = [{"dim": axis}]
+                                ops_config = [
+                                    {
+                                        "op_type": "index_select",
+                                        "op_inputs": input,
+                                        "op_outputs": {"Out": ["output_data"]},
+                                        "op_attrs": dics[0],
+                                    }
+                                ]
+                                ops = self.generate_op_config(ops_config)
+
+                                program_config = ProgramConfig(
+                                    ops=ops,
+                                    weights={},
+                                    inputs={
+                                        "input_data": TensorConfig(
+                                            data_gen=partial(
+                                                generate_input1, shape
+                                            )
+                                        ),
+                                        "index_data": TensorConfig(
+                                            data_gen=partial(
+                                                generate_input2
+                                                if index_type_int32
+                                                else generate_input4,
+                                                index,
+                                            )
+                                        ),
+                                    },
+                                    outputs=["output_data"],
+                                )
+
+                                yield program_config
+
+    def sample_predictor_configs(
+        self, program_config
+    ) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            if len(self.shape) == 1:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [4],
+                    "index_data": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [128],
+                    "index_data": [4],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [16],
+                    "index_data": [2],
+                }
+            elif len(self.shape) == 2:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [2, 4],
+                    "index_data": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [256, 256],
+                    "index_data": [4],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [64, 32],
+                    "index_data": [2],
+                }
+            elif len(self.shape) == 3:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [2, 4, 4],
+                    "index_data": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [128, 256, 256],
+                    "index_data": [4],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [16, 64, 32],
+                    "index_data": [2],
+                }
+            elif len(self.shape) == 4:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [2, 4, 4, 2],
+                    "index_data": [1],
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [128, 256, 64, 128],
+                    "index_data": [4],
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [16, 64, 16, 32],
+                    "index_data": [2],
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(dynamic_shape):
+            if dynamic_shape:
+                ver = paddle_infer.get_trt_compile_version()
+                if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8200:
+                    return 0, 4
+                return 1, 3
+            else:
+                return 0, 4
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            False
+        ), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            False
+        ), 1e-3
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(True), 1e-3
+
+    def test(self):
+        self.trt_param.workspace_size = 1 << 60
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab