diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 0645af611b9d252e0cec691cc06e29800e110845..c41b667e18a833111fdfc70c37b267430753dffb 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1960,6 +1960,8 @@ USE_TRT_CONVERTER(strided_slice) USE_TRT_CONVERTER(transformer_input_convert) USE_TRT_CONVERTER(recover_padding) USE_TRT_CONVERTER(remove_padding) +USE_TRT_CONVERTER(top_k) +USE_TRT_CONVERTER(top_k_v2) #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000) USE_TRT_CONVERTER(sparse_fc) USE_TRT_CONVERTER(sparse_multihead_matmul) diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 2c9ba4282153561cb14f3169aa09cfa84e7bc241..52a3c1df9a92550dd1edd8c4da41d84cbddf8b6e 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -60,7 +60,8 @@ list( roll_op.cc transformer_input_convert_op.cc remove_padding_op.cc - recover_padding_op.cc) + recover_padding_op.cc + top_k_op.cc) if(CUSPARSELT_FOUND AND ${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8) list(APPEND CONVERT_FILES sparse_fc_op.cc sparse_multihead_matmul_op.cc) diff --git a/paddle/fluid/inference/tensorrt/convert/top_k_op.cc b/paddle/fluid/inference/tensorrt/convert/top_k_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..1d7f1ca243b2ae2e04661e058227b2e02c0acc4f --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/top_k_op.cc @@ -0,0 +1,116 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include + +#include + +#include "glog/logging.h" +#include "paddle/fluid/framework/op_desc.h" +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/engine.h" +#include "paddle/fluid/inference/tensorrt/helper.h" +#include "paddle/fluid/platform/enforce.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +class TopKOpConverter : public OpConverter { + public: + TopKOpConverter() {} + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + // Here the two nullptr looks strange, that's because the + // framework::OpDesc's constructor is strange. + framework::OpDesc op_desc(op, nullptr); + + auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]); + + const int k = op_desc.HasAttr("k") + ? BOOST_GET_CONST(int, op_desc.GetAttr("k")) + : 1.0f; + + nvinfer1::Dims input_dims = input_tensor->getDimensions(); + int axis = input_dims.nbDims; + nvinfer1::ITopKLayer* layer = + TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, + nvinfer1::TopKOperation::kMAX, k, 1 << (axis - 1)); + + std::vector output_names; + output_names.push_back(op_desc.Output("Out").front()); + output_names.push_back(op_desc.Output("Indices").front()); + + RreplenishLayerAndOutput(layer, "top_k", output_names, test_mode); + } +}; +class TopKv2OpConverter : public OpConverter { + public: + TopKv2OpConverter() {} + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + // Here the two nullptr looks strange, that's because the + // framework::OpDesc's constructor is strange. + framework::OpDesc op_desc(op, nullptr); + + auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]); + + const int k = op_desc.HasAttr("k") + ? BOOST_GET_CONST(int, op_desc.GetAttr("k")) + : 1.0f; + const int axis = op_desc.HasAttr("axis") + ? BOOST_GET_CONST(int, op_desc.GetAttr("axis")) + : 1.0f; + const bool largest = op_desc.HasAttr("largest") + ? BOOST_GET_CONST(bool, op_desc.GetAttr("largest")) + : true; + auto flag = + largest ? nvinfer1::TopKOperation::kMAX : nvinfer1::TopKOperation::kMIN; + nvinfer1::ITopKLayer* layer = nullptr; + if (axis == -1) { + nvinfer1::Dims input_dims = input_tensor->getDimensions(); + layer = TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k, + 1 << (input_dims.nbDims - 1)); + } else { + if (engine_->with_dynamic_shape()) { + layer = TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k, + 1 << axis); + } else { + layer = TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k, + 1 << (axis - 1)); + } + } + std::vector output_names; + output_names.push_back(op_desc.Output("Out").front()); + output_names.push_back(op_desc.Output("Indices").front()); + + RreplenishLayerAndOutput(layer, "top_k_v2", output_names, test_mode); + } +}; +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(top_k, TopKOpConverter); +REGISTER_TRT_OP_CONVERTER(top_k_v2, TopKv2OpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 57ac400dadab35deea270abfb03e027c66a43839..5d4a8ee1f0c84796fb3055c8e2956e1a6889932a 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -104,6 +104,8 @@ struct SimpleOpTypeSetTeller : public Teller { "stack", "transpose2", "transpose", + "top_k", + "top_k_v2", "flatten2", "flatten", "gather", @@ -175,6 +177,8 @@ struct SimpleOpTypeSetTeller : public Teller { "stack", "transpose2", "transpose", + "top_k", + "top_k_v2", "flatten2", "flatten", "gather", @@ -1759,6 +1763,34 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } } + if (op_type == "top_k_v2" || op_type == "top_k") { + auto* block = desc.Block(); + auto x_var_name = desc.Input("X")[0]; + auto* x_var_desc = block->FindVar(x_var_name); + const auto x_shape = x_var_desc->GetShape(); + if (x_shape.size() == 1) { + VLOG(3) << "top_k/top_k_v2 does not support 1-dimensional input in " + "tensorrt"; + return false; + } + if (desc.HasAttr("axis")) { + int axis = BOOST_GET_CONST(int, desc.GetAttr("axis")); + if (axis == 0) { + VLOG(3) << "top_k_v2 does not support axis == 0 in " + "tensorrt"; + return false; + } + } + if (desc.HasAttr("sorted")) { + bool sorted = BOOST_GET_CONST(bool, desc.GetAttr("sorted")); + if (!sorted) { + VLOG(3) << "top_k_v2 does not support results not sorted in " + "tensorrt"; + return false; + } + } + } + #if IS_TRT_VERSION_GE(8000) if (op_type == "sparse_fc" || op_type == "sparse_multihead_matmul") { if (!with_dynamic_shape) { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py new file mode 100644 index 0000000000000000000000000000000000000000..28509d42ee30b1de06b3c903f8587e75f6b514c1 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k.py @@ -0,0 +1,136 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import unittest +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set + + +class TrtConvertActivationTest(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + self.trt_param.workspace_size = 1073741824 + + def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): + if dims == 1: + return np.random.random([32]).astype(np.float32) + elif dims == 2: + return np.random.random([3, 32]).astype(np.float32) + elif dims == 3: + return np.random.random([3, 32, 32]).astype(np.float32) + else: + return np.random.random([batch, 3, 32, 32]).astype(np.float32) + + for dims in [2, 3, 4, 5]: + for batch in [1]: + for k in [1, 3]: + self.dims = dims + dics = [{"k": k}] + ops_config = [{ + "op_type": "top_k", + "op_inputs": { + "X": ["input_data"] + }, + "op_outputs": { + "Out": ["output_data"], + "Indices": ["indices_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(data_gen=partial( + generate_input1, dims, batch, dics)) + }, + outputs=["output_data", "indices_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + if self.dims == 1: + self.dynamic_shape.min_input_shape = {"input_data": [1]} + self.dynamic_shape.max_input_shape = {"input_data": [64]} + self.dynamic_shape.opt_input_shape = {"input_data": [32]} + elif self.dims == 2: + self.dynamic_shape.min_input_shape = {"input_data": [1, 16]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 32]} + self.dynamic_shape.opt_input_shape = {"input_data": [3, 32]} + elif self.dims == 3: + self.dynamic_shape.min_input_shape = {"input_data": [1, 16, 16]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 32, 32]} + self.dynamic_shape.opt_input_shape = {"input_data": [3, 32, 32]} + else: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3, 16, 16] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 3, 32, 32] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 3, 32, 32] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if self.dims == 1: + return 0, 4 + return 1, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + + ## for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..651cc00d2cd7a65eef61afb317a1d81d74848327 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_top_k_v2.py @@ -0,0 +1,153 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import unittest +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set + + +class TrtConvertActivationTest(TrtLayerAutoScanTest): + + def is_program_valid(self, program_config: ProgramConfig) -> bool: + inputs = program_config.inputs + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + if len(inputs['input_data'].shape) <= attrs[0]['axis']: + return False + return True + + def sample_program_configs(self): + self.trt_param.workspace_size = 1073741824 + + def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): + if dims == 1: + return np.random.random([3]).astype(np.float32) + elif dims == 2: + return np.random.random([3, 32]).astype(np.float32) + elif dims == 3: + return np.random.random([3, 32, 32]).astype(np.float32) + else: + return np.random.random([batch, 32, 32, 32]).astype(np.float32) + + for dims in [1, 2, 3, 4]: + for batch in [1, 4]: + for k in [1, 3]: + for axis in [-1, 1, 2, 3]: + for largest in [True, False]: + for sort in [True, False]: + self.dims = dims + self.sort = sort + dics = [{ + "k": k, + "axis": axis, + "largest": largest, + "sorted": sort + }] + ops_config = [{ + "op_type": "top_k_v2", + "op_inputs": { + "X": ["input_data"] + }, + "op_outputs": { + "Out": ["output_data"], + "Indices": ["indices_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": + TensorConfig(data_gen=partial( + generate_input1, dims, batch, dics)) + }, + outputs=["output_data", "indices_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + if self.dims == 1: + self.dynamic_shape.min_input_shape = {"input_data": [1]} + self.dynamic_shape.max_input_shape = {"input_data": [64]} + self.dynamic_shape.opt_input_shape = {"input_data": [32]} + elif self.dims == 2: + self.dynamic_shape.min_input_shape = {"input_data": [1, 1]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 64]} + self.dynamic_shape.opt_input_shape = {"input_data": [3, 10]} + elif self.dims == 3: + self.dynamic_shape.min_input_shape = {"input_data": [1, 1, 1]} + self.dynamic_shape.max_input_shape = {"input_data": [4, 64, 64]} + self.dynamic_shape.opt_input_shape = {"input_data": [3, 10, 10]} + else: + self.dynamic_shape.min_input_shape = { + "input_data": [1, 3, 16, 16] + } + self.dynamic_shape.max_input_shape = { + "input_data": [4, 32, 32, 32] + } + self.dynamic_shape.opt_input_shape = { + "input_data": [1, 3, 32, 32] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if self.dims == 1: + return 0, 4 + if self.sort == False: + return 0, 4 + return 1, 3 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), 1e-5 + + def test(self): + self.run_test() + + +if __name__ == "__main__": + unittest.main()