[inference]add topk/topk_v2 trt convertor (#43368)

65e86580 · 津 · GitHub · 4af7ebf4 · 65e86580 · 65e86580
6 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -1960,6 +1960,8 @@ USE_TRT_CONVERTER(strided_slice)
 USE_TRT_CONVERTER(transformer_input_convert)
 USE_TRT_CONVERTER(recover_padding)
 USE_TRT_CONVERTER(remove_padding)
+USE_TRT_CONVERTER(top_k)
+USE_TRT_CONVERTER(top_k_v2)
 #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
 USE_TRT_CONVERTER(sparse_fc)
 USE_TRT_CONVERTER(sparse_multihead_matmul)

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -60,7 +60,8 @@ list(
  roll_op.cc
  transformer_input_convert_op.cc
  remove_padding_op.cc
-  recover_padding_op.cc)
+  recover_padding_op.cc
+  top_k_op.cc)

 if(CUSPARSELT_FOUND AND ${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8)
  list(APPEND CONVERT_FILES sparse_fc_op.cc sparse_multihead_matmul_op.cc)

--- a/paddle/fluid/inference/tensorrt/convert/top_k_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/top_k_op.cc
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <NvInfer.h>
+
+#include <string>
+
+#include "glog/logging.h"
+#include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+#include "paddle/fluid/inference/tensorrt/engine.h"
+#include "paddle/fluid/inference/tensorrt/helper.h"
+#include "paddle/fluid/platform/enforce.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+class TopKOpConverter : public OpConverter {
+ public:
+  TopKOpConverter() {}
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    // Here the two nullptr looks strange, that's because the
+    // framework::OpDesc's constructor is strange.
+    framework::OpDesc op_desc(op, nullptr);
+
+    auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]);
+
+    const int k = op_desc.HasAttr("k")
+                      ? BOOST_GET_CONST(int, op_desc.GetAttr("k"))
+                      : 1.0f;
+
+    nvinfer1::Dims input_dims = input_tensor->getDimensions();
+    int axis = input_dims.nbDims;
+    nvinfer1::ITopKLayer* layer =
+        TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor,
+                             nvinfer1::TopKOperation::kMAX, k, 1 << (axis - 1));
+
+    std::vector<std::string> output_names;
+    output_names.push_back(op_desc.Output("Out").front());
+    output_names.push_back(op_desc.Output("Indices").front());
+
+    RreplenishLayerAndOutput(layer, "top_k", output_names, test_mode);
+  }
+};
+class TopKv2OpConverter : public OpConverter {
+ public:
+  TopKv2OpConverter() {}
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope, bool test_mode) override {
+    // Here the two nullptr looks strange, that's because the
+    // framework::OpDesc's constructor is strange.
+    framework::OpDesc op_desc(op, nullptr);
+
+    auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]);
+
+    const int k = op_desc.HasAttr("k")
+                      ? BOOST_GET_CONST(int, op_desc.GetAttr("k"))
+                      : 1.0f;
+    const int axis = op_desc.HasAttr("axis")
+                         ? BOOST_GET_CONST(int, op_desc.GetAttr("axis"))
+                         : 1.0f;
+    const bool largest = op_desc.HasAttr("largest")
+                             ? BOOST_GET_CONST(bool, op_desc.GetAttr("largest"))
+                             : true;
+    auto flag =
+        largest ? nvinfer1::TopKOperation::kMAX : nvinfer1::TopKOperation::kMIN;
+    nvinfer1::ITopKLayer* layer = nullptr;
+    if (axis == -1) {
+      nvinfer1::Dims input_dims = input_tensor->getDimensions();
+      layer = TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k,
+                                   1 << (input_dims.nbDims - 1));
+    } else {
+      if (engine_->with_dynamic_shape()) {
+        layer = TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k,
+                                     1 << axis);
+      } else {
+        layer = TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k,
+                                     1 << (axis - 1));
+      }
+    }
+    std::vector<std::string> output_names;
+    output_names.push_back(op_desc.Output("Out").front());
+    output_names.push_back(op_desc.Output("Indices").front());
+
+    RreplenishLayerAndOutput(layer, "top_k_v2", output_names, test_mode);
+  }
+};
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(top_k, TopKOpConverter);
+REGISTER_TRT_OP_CONVERTER(top_k_v2, TopKv2OpConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -104,6 +104,8 @@ struct SimpleOpTypeSetTeller : public Teller {
      "stack",
      "transpose2",
      "transpose",
+      "top_k",
+      "top_k_v2",
      "flatten2",
      "flatten",
      "gather",
@@ -175,6 +177,8 @@ struct SimpleOpTypeSetTeller : public Teller {
      "stack",
      "transpose2",
      "transpose",
+      "top_k",
+      "top_k_v2",
      "flatten2",
      "flatten",
      "gather",
@@ -1759,6 +1763,34 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
      }
    }

+    if (op_type == "top_k_v2" || op_type == "top_k") {
+      auto* block = desc.Block();
+      auto x_var_name = desc.Input("X")[0];
+      auto* x_var_desc = block->FindVar(x_var_name);
+      const auto x_shape = x_var_desc->GetShape();
+      if (x_shape.size() == 1) {
+        VLOG(3) << "top_k/top_k_v2 does not support 1-dimensional input in "
+                   "tensorrt";
+        return false;
+      }
+      if (desc.HasAttr("axis")) {
+        int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
+        if (axis == 0) {
+          VLOG(3) << "top_k_v2 does not support axis == 0 in "
+                     "tensorrt";
+          return false;
+        }
+      }
+      if (desc.HasAttr("sorted")) {
+        bool sorted = BOOST_GET_CONST(bool, desc.GetAttr("sorted"));
+        if (!sorted) {
+          VLOG(3) << "top_k_v2 does not support results not sorted in "
+                     "tensorrt";
+          return false;
+        }
+      }
+    }
+
 #if IS_TRT_VERSION_GE(8000)
    if (op_type == "sparse_fc" || op_type == "sparse_multihead_matmul") {
      if (!with_dynamic_shape) {

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
+import unittest
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+
+
+class TrtConvertActivationTest(TrtLayerAutoScanTest):
+
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        self.trt_param.workspace_size = 1073741824
+
+        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
+            if dims == 1:
+                return np.random.random([32]).astype(np.float32)
+            elif dims == 2:
+                return np.random.random([3, 32]).astype(np.float32)
+            elif dims == 3:
+                return np.random.random([3, 32, 32]).astype(np.float32)
+            else:
+                return np.random.random([batch, 3, 32, 32]).astype(np.float32)
+
+        for dims in [2, 3, 4, 5]:
+            for batch in [1]:
+                for k in [1, 3]:
+                    self.dims = dims
+                    dics = [{"k": k}]
+                    ops_config = [{
+                        "op_type": "top_k",
+                        "op_inputs": {
+                            "X": ["input_data"]
+                        },
+                        "op_outputs": {
+                            "Out": ["output_data"],
+                            "Indices": ["indices_data"]
+                        },
+                        "op_attrs": dics[0]
+                    }]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input_data":
+                            TensorConfig(data_gen=partial(
+                                generate_input1, dims, batch, dics))
+                        },
+                        outputs=["output_data", "indices_data"])
+
+                    yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+
+        def generate_dynamic_shape(attrs):
+            if self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [64]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [32]}
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 16]}
+                self.dynamic_shape.max_input_shape = {"input_data": [4, 32]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]}
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]}
+                self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 16, 16]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [4, 3, 32, 32]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 32, 32]
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if self.dims == 1:
+                return 0, 4
+            return 1, 3
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+
+        ## for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), 1e-5
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k_v2.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k_v2.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
+import unittest
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+
+
+class TrtConvertActivationTest(TrtLayerAutoScanTest):
+
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        inputs = program_config.inputs
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+        if len(inputs['input_data'].shape) <= attrs[0]['axis']:
+            return False
+        return True
+
+    def sample_program_configs(self):
+        self.trt_param.workspace_size = 1073741824
+
+        def generate_input1(dims, batch, attrs: List[Dict[str, Any]]):
+            if dims == 1:
+                return np.random.random([3]).astype(np.float32)
+            elif dims == 2:
+                return np.random.random([3, 32]).astype(np.float32)
+            elif dims == 3:
+                return np.random.random([3, 32, 32]).astype(np.float32)
+            else:
+                return np.random.random([batch, 32, 32, 32]).astype(np.float32)
+
+        for dims in [1, 2, 3, 4]:
+            for batch in [1, 4]:
+                for k in [1, 3]:
+                    for axis in [-1, 1, 2, 3]:
+                        for largest in [True, False]:
+                            for sort in [True, False]:
+                                self.dims = dims
+                                self.sort = sort
+                                dics = [{
+                                    "k": k,
+                                    "axis": axis,
+                                    "largest": largest,
+                                    "sorted": sort
+                                }]
+                                ops_config = [{
+                                    "op_type": "top_k_v2",
+                                    "op_inputs": {
+                                        "X": ["input_data"]
+                                    },
+                                    "op_outputs": {
+                                        "Out": ["output_data"],
+                                        "Indices": ["indices_data"]
+                                    },
+                                    "op_attrs": dics[0]
+                                }]
+                                ops = self.generate_op_config(ops_config)
+
+                                program_config = ProgramConfig(
+                                    ops=ops,
+                                    weights={},
+                                    inputs={
+                                        "input_data":
+                                        TensorConfig(data_gen=partial(
+                                            generate_input1, dims, batch, dics))
+                                    },
+                                    outputs=["output_data", "indices_data"])
+
+                                yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+
+        def generate_dynamic_shape(attrs):
+            if self.dims == 1:
+                self.dynamic_shape.min_input_shape = {"input_data": [1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [64]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [32]}
+            elif self.dims == 2:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [4, 64]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 10]}
+            elif self.dims == 3:
+                self.dynamic_shape.min_input_shape = {"input_data": [1, 1, 1]}
+                self.dynamic_shape.max_input_shape = {"input_data": [4, 64, 64]}
+                self.dynamic_shape.opt_input_shape = {"input_data": [3, 10, 10]}
+            else:
+                self.dynamic_shape.min_input_shape = {
+                    "input_data": [1, 3, 16, 16]
+                }
+                self.dynamic_shape.max_input_shape = {
+                    "input_data": [4, 32, 32, 32]
+                }
+                self.dynamic_shape.opt_input_shape = {
+                    "input_data": [1, 3, 32, 32]
+                }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if self.dims == 1:
+                return 0, 4
+            if self.sort == False:
+                return 0, 4
+            return 1, 3
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), 1e-5
+
+    def test(self):
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()