[Paddle-TRT] gather converter (#31640)

* trt gather converter * add trt gather unit_test

[Paddle-TRT] gather converter (#31640)
* trt gather converter * add trt gather unit_test
fe241fd0 · zlsh80826 · GitHub · 4ea34278 · fe241fd0 · fe241fd0
5 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1191,6 +1191,7 @@ USE_TRT_CONVERTER(slice);
 USE_TRT_CONVERTER(scale);
 USE_TRT_CONVERTER(stack);
 USE_TRT_CONVERTER(clip);
+USE_TRT_CONVERTER(gather);
 #endif
 namespace paddle_infer {

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -5,6 +5,7 @@ nv_library(tensorrt_converter
                pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc
                shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc
                emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
+                gather_op.cc
           DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
 nv_test(test_op_converter SRCS test_op_converter.cc DEPS

--- a/paddle/fluid/inference/tensorrt/convert/gather_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/gather_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+namespace paddle {
+namespace framework {
+class Scope;
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+/*
+ * Gather Op
+ */
+class GatherOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    VLOG(3) << "convert a fluid gather op to tensorrt gather layer";
+    framework::OpDesc op_desc(op, nullptr);
+    std::string input_name = op_desc.Input("X").front();
+    std::string index_name = op_desc.Input("Index").front();
+    std::string output_name = op_desc.Output("Out").front();
+    const auto input_tensor = engine_->GetITensor(input_name);
+    const auto index_tensor = engine_->GetITensor(index_name);
+    const int axis = 0;
+    auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor,
+                                      *index_tensor, axis);
+    auto odim = layer->getOutput(0)->getDimensions();
+    auto reshape_layer =
+        TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0));
+    nvinfer1::Dims target_shape{};
+    target_shape.nbDims = odim.nbDims - 1;
+    for (int i = 0; i < axis; ++i) {
+      target_shape.d[i] = odim.d[i];
+    }
+    target_shape.d[axis] = 0;
+    for (int i = axis + 1; i < target_shape.nbDims; ++i) {
+      target_shape.d[i] = odim.d[i + 1];
+    }
+    reshape_layer->setReshapeDimensions(target_shape);
+    RreplenishLayerAndOutput(reshape_layer, "gather", {output_name}, test_mode);
+  }
+};
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+REGISTER_TRT_OP_CONVERTER(gather, GatherOpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -109,6 +109,7 @@ struct SimpleOpTypeSetTeller : public Teller {
      "transpose",
      "flatten2",
      "flatten",
+      "gather",
  };
 };
@@ -186,6 +187,10 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
        if (axis != 1) return false;
      }
    }
+    if (op_type == "gather") {
+      // current not support axis from input, use default 0
+      if (!with_dynamic_shape || desc.Input("Axis").size() > 0) return false;
+    }
    if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
  }
  return false;

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import print_function
+import unittest
+import numpy as np
+from inference_pass_test import InferencePassTest
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.core import PassVersionChecker
+from paddle.fluid.core import AnalysisConfig
+class TRTGatherTest(InferencePassTest):
+    def setUp(self):
+        self.set_params()
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name='data', shape=[-1, 512], dtype='float32')
+            index = fluid.data(name='index', shape=[-1], dtype='int32')
+            scale_out = self.append_gather(data, index)
+            out = fluid.layers.batch_norm(scale_out, is_test=True)
+        index = np.arange(self.num_gather, dtype='int32')
+        np.random.shuffle(index)
+        self.feeds = {
+            "data": np.random.random([self.bs, 512]).astype("float32"),
+            "index": index,
+        }
+        self.enable_trt = True
+        self.trt_parameters = TRTGatherTest.TensorRTParam(
+            1 << 30, self.bs, 1, AnalysisConfig.Precision.Float32, False, False)
+        self.fetch_list = [out]
+    def set_params(self):
+        self.num_gather = 16
+        self.bs = 32
+    def append_gather(self, data, index):
+        return fluid.layers.gather(data, index=index)
+    def test_check_output(self):
+        if core.is_compiled_with_cuda():
+            use_gpu = True
+            self.check_output_with_option(use_gpu, flatten=True)
+            self.assertTrue(
+                PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
+class TRTGatherTest1(TRTGatherTest):
+    def set_params(self):
+        self.num_gather = 32
+        self.bs = 32
+if __name__ == "__main__":
+    unittest.main()