diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index d6080bd69284efbb7d444e97c4ab0a331db5054b..fc436311f0796c2211f447822741f33c4ed4549c 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1191,6 +1191,7 @@ USE_TRT_CONVERTER(slice); USE_TRT_CONVERTER(scale); USE_TRT_CONVERTER(stack); USE_TRT_CONVERTER(clip); +USE_TRT_CONVERTER(gather); #endif namespace paddle_infer { diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index f9586ca1701f74f140e6a78b8758a76c1739a54a..59205529ef4c029ce7d08e382a02c868d7e94db1 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -5,6 +5,7 @@ nv_library(tensorrt_converter pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc + gather_op.cc DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS diff --git a/paddle/fluid/inference/tensorrt/convert/gather_op.cc b/paddle/fluid/inference/tensorrt/convert/gather_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..346a8bffa00e383781a2e0a26afaa97437598b8d --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/gather_op.cc @@ -0,0 +1,78 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * Gather Op + */ +class GatherOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(3) << "convert a fluid gather op to tensorrt gather layer"; + + framework::OpDesc op_desc(op, nullptr); + std::string input_name = op_desc.Input("X").front(); + std::string index_name = op_desc.Input("Index").front(); + std::string output_name = op_desc.Output("Out").front(); + + const auto input_tensor = engine_->GetITensor(input_name); + const auto index_tensor = engine_->GetITensor(index_name); + + const int axis = 0; + + auto layer = TRT_ENGINE_ADD_LAYER(engine_, Gather, *input_tensor, + *index_tensor, axis); + + auto odim = layer->getOutput(0)->getDimensions(); + + auto reshape_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *layer->getOutput(0)); + + nvinfer1::Dims target_shape{}; + target_shape.nbDims = odim.nbDims - 1; + for (int i = 0; i < axis; ++i) { + target_shape.d[i] = odim.d[i]; + } + target_shape.d[axis] = 0; + for (int i = axis + 1; i < target_shape.nbDims; ++i) { + target_shape.d[i] = odim.d[i + 1]; + } + + reshape_layer->setReshapeDimensions(target_shape); + + RreplenishLayerAndOutput(reshape_layer, "gather", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(gather, GatherOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 72338bcef1197a0051052c653f726e15d576cbd0..44939606b49c3578d5bb50c5e3c0f658d09b6eb8 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -109,6 +109,7 @@ struct SimpleOpTypeSetTeller : public Teller { "transpose", "flatten2", "flatten", + "gather", }; }; @@ -186,6 +187,10 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (axis != 1) return false; } } + if (op_type == "gather") { + // current not support axis from input, use default 0 + if (!with_dynamic_shape || desc.Input("Axis").size() > 0) return false; + } if ((*teller)(op_type, desc, use_no_calib_int8)) return true; } return false; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py new file mode 100644 index 0000000000000000000000000000000000000000..fec15ea7295a0ff46fe5a4cce0012d3cf8dc21f3 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_gather_op.py @@ -0,0 +1,70 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TRTGatherTest(InferencePassTest): + def setUp(self): + self.set_params() + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name='data', shape=[-1, 512], dtype='float32') + index = fluid.data(name='index', shape=[-1], dtype='int32') + scale_out = self.append_gather(data, index) + out = fluid.layers.batch_norm(scale_out, is_test=True) + + index = np.arange(self.num_gather, dtype='int32') + np.random.shuffle(index) + + self.feeds = { + "data": np.random.random([self.bs, 512]).astype("float32"), + "index": index, + } + + self.enable_trt = True + self.trt_parameters = TRTGatherTest.TensorRTParam( + 1 << 30, self.bs, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def set_params(self): + self.num_gather = 16 + self.bs = 32 + + def append_gather(self, data, index): + return fluid.layers.gather(data, index=index) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTGatherTest1(TRTGatherTest): + def set_params(self): + self.num_gather = 32 + self.bs = 32 + + +if __name__ == "__main__": + unittest.main()