diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index d32ec581ce94b46d152de2084f9af8180ad0bdc9..b31b5f906b9b9bd8d08bb6953965d9793d62caa3 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1256,6 +1256,7 @@ USE_TRT_CONVERTER(reshape); USE_TRT_CONVERTER(reduce_sum); USE_TRT_CONVERTER(gather_nd); USE_TRT_CONVERTER(reduce_mean); +USE_TRT_CONVERTER(tile); #endif namespace paddle_infer { diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 2e4a175566a7a100749d14c712e8ef9a89eb6019..63d9114e1acda05ce5becbe4c10e1032d58c8efb 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -15,6 +15,7 @@ nv_library(tensorrt_converter reshape_op.cc reduce_op.cc gather_nd_op.cc + tile_op.cc DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS diff --git a/paddle/fluid/inference/tensorrt/convert/tile_op.cc b/paddle/fluid/inference/tensorrt/convert/tile_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..860d04f03ecf3fe5e07f6d7e2226bd0582135bf5 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/tile_op.cc @@ -0,0 +1,78 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at +http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * ReshapeOp + */ +class TileOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { +#if IS_TRT_VERSION_GE(7000) + VLOG(4) << "convert a fluid tile op to tensorrt tile layer"; + + framework::OpDesc op_desc(op, nullptr); + // Declare inputs + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + nvinfer1::Dims input_shape = input->getDimensions(); + std::vector repeat_times = + BOOST_GET_CONST(std::vector, op_desc.GetAttr("repeat_times")); + + nvinfer1::Dims output_dim = input_shape; + nvinfer1::Dims output_stride; + // If input_dims.nbDims + 1 < repeat_times.size() means we + // should expand 1 on batchsize. trt doesn't support this behavior. + PADDLE_ENFORCE_GE(input_shape.nbDims + 1, repeat_times.size(), + platform::errors::InvalidArgument( + "Can't change batchsize, please check repeat_times")); + int diff = input_shape.nbDims + 1 - repeat_times.size(); + if (diff > 0) repeat_times.insert(repeat_times.begin(), diff, 1); + + // Can't expand on batchsize + PADDLE_ENFORCE_EQ( + repeat_times[0], 1, + platform::errors::InvalidArgument( + "Can't expand on batchsize, please check repeat_times")); + output_stride.nbDims = input_shape.nbDims; + for (int i = 0; i < input_shape.nbDims; i++) { + output_dim.d[i] = output_dim.d[i] * repeat_times[i + 1]; + output_stride.d[i] = 1; + } + + auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, input_shape, + output_dim, output_stride); + layer->setMode(nvinfer1::SliceMode::kWRAP); + auto output_name = op_desc.Output("Out")[0]; + RreplenishLayerAndOutput(layer, "tile", {output_name}, test_mode); +#endif + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(tile, TileOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 6c6006065435f41f7c12377ece959ec9633fd134..2829a740236d271ccf3af511e2afd731f3ab7cf5 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -51,6 +51,9 @@ struct SimpleOpTypeSetTeller : public Teller { #if IS_TRT_VERSION_GE(7130) teller_set.insert("group_norm"); #endif +#if IS_TRT_VERSION_GE(7000) + teller_set.insert("tile"); +#endif #if CUDA_VERSION >= 10020 teller_set.insert("reshape"); teller_set.insert("reshape2"); @@ -716,12 +719,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, VLOG(3) << "the " << op_type << " does not have attr (keep_dim or dim or " "reduce_all)"; + std::cout << "attr " << desc.HasAttr("keep_dim") << " " + << desc.HasAttr("dim") << " " << desc.HasAttr("reduce_all"); return false; } // The batch size dimension cannot be reduced if it's not dynamic shape. if (!with_dynamic_shape) { - if (desc.HasAttr("reduce_all")) return false; + if (BOOST_GET_CONST(bool, desc.GetAttr("reduce_all"))) return false; std::vector dim = BOOST_GET_CONST(std::vector, desc.GetAttr("dim")); for (auto x : dim) { @@ -729,6 +734,21 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } } } +#if IS_TRT_VERSION_GE(7000) + if (op_type == "tile") { + // Paddle-TRT does not support the input tensors. + auto inputs = desc.InputArgumentNames(); + for (auto& input : inputs) { + if (input == "repeat_times_tensor" && + desc.Input("repeat_times_tensor").size() > 0) + return false; + if (input == "RepeatTimes" && desc.Input("RepeatTimes").size() > 0) + return false; + } + if (with_dynamic_shape) return false; + if (!with_dynamic_shape && !desc.HasAttr("repeat_times")) return false; + } +#endif if ((*teller)(op_type, desc, use_no_calib_int8)) return true; } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index 281bbb078b74b6debefda75944fbb014a9128fa0..45e392cd66e90a14a1176f0bd076391a811de53c 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -37,4 +37,5 @@ set_tests_properties(test_trt_conv_pass PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_dynamic_shape PROPERTIES TIMEOUT 120) set_tests_properties(test_trt_pool_op PROPERTIES ENVIRONMENT FLAGS_fraction_of_gpu_memory_to_use=0.1 TIMEOUT 45) set_tests_properties(test_trt_reduce_mean_op PROPERTIES TIMEOUT 60) +set_tests_properties(test_trt_tile_op PROPERTIES TIMEOUT 60) endif() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py new file mode 100644 index 0000000000000000000000000000000000000000..cfdc9480aab48b3c9210b334bd000f0e18642a44 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_tile_op.py @@ -0,0 +1,121 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TRTTileTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[4, 3, 224, 256], dtype="float32") + tile_out = paddle.tile(x=data, repeat_times=[1, 1, 1, 1]) + out = fluid.layers.batch_norm(tile_out, is_test=True) + + self.feeds = { + "data": np.random.random([4, 3, 224, 256]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTTileTest.TensorRTParam( + 1 << 30, 16, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTTileExpandTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32") + tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920]) + out = fluid.layers.batch_norm(tile_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 1, 1, 1]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTTileExpandTest.TensorRTParam( + 1 << 30, 1, 1, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTTileExpandStaticTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32") + tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920]) + out = fluid.layers.batch_norm(tile_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 1, 1, 1]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTTileExpandStaticTest.TensorRTParam( + 1 << 30, 1, 1, AnalysisConfig.Precision.Float32, True, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTTileExpandHalfTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[1, 1, 1, 1], dtype="float32") + tile_out = paddle.tile(x=data, repeat_times=[1, 4, 1080, 1920]) + out = fluid.layers.batch_norm(tile_out, is_test=True) + + self.feeds = { + "data": np.random.random([1, 1, 1, 1]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTTileExpandHalfTest.TensorRTParam( + 1 << 30, 1, 1, AnalysisConfig.Precision.Half, False, False) + self.fetch_list = [out] + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu, flatten=True) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +if __name__ == "__main__": + unittest.main()