diff --git a/paddle/fluid/inference/tensorrt/convert/flatten_op.cc b/paddle/fluid/inference/tensorrt/convert/flatten_op.cc index 03a1c1672469eca959dc08800b248f96ef165b13..322b42667fa30ff31428f70c101ff81460a17df1 100644 --- a/paddle/fluid/inference/tensorrt/convert/flatten_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/flatten_op.cc @@ -25,7 +25,7 @@ namespace inference { namespace tensorrt { /* - * FlattenOp, only support static shape mode currently. + * FlattenOp trt converter */ class FlattenOpConverter : public OpConverter { public: @@ -35,21 +35,48 @@ class FlattenOpConverter : public OpConverter { // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); int dims = input->getDimensions().nbDims; + nvinfer1::IShuffleLayer* layer = nullptr; + if (!engine_->with_dynamic_shape()) { + int dim_prod = 1; + for (int i = 0; i < dims; i++) { + int dim_i = input->getDimensions().d[i]; + PADDLE_ENFORCE_GT( + dim_i, 0, + platform::errors::InvalidArgument( + "flatten input dim should be > 0, but got %d.", dim_i)); + dim_prod *= dim_i; + } + nvinfer1::Dims flatten_dim; + flatten_dim.nbDims = 1; + flatten_dim.d[0] = dim_prod; + layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + layer->setReshapeDimensions(flatten_dim); + } else { + auto* shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input); + uint32_t reduce_dim = 1; - int dim_prod = 1; - for (int i = 0; i < dims; i++) { - int dim_i = input->getDimensions().d[i]; - PADDLE_ENFORCE_GT( - dim_i, 0, platform::errors::InvalidArgument( - "flatten input dim should be > 0, but got %d.", dim_i)); - dim_prod *= dim_i; + auto* reduce_prod_layer = TRT_ENGINE_ADD_LAYER( + engine_, Reduce, *(shape_layer->getOutput(0)), + nvinfer1::ReduceOperation::kPROD, reduce_dim, true); + int32_t* constant_weight_data = new int32_t[1]; + constant_weight_data[0] = -1; + TensorRTEngine::Weight constant_weight{ + nvinfer1::DataType::kINT32, static_cast(constant_weight_data), + 1}; + nvinfer1::Dims constant_dims; + constant_dims.nbDims = 1; + constant_dims.d[0] = 1; + auto* constant_layer = TRT_ENGINE_ADD_LAYER( + engine_, Constant, constant_dims, constant_weight.get()); + std::vector itensors; + itensors.push_back(constant_layer->getOutput(0)); + itensors.push_back(reduce_prod_layer->getOutput(0)); + auto* concat_layer = + TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), 2); + concat_layer->setAxis(0); + layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + layer->setInput(1, *(concat_layer->getOutput(0))); } - nvinfer1::Dims flatten_dim; - flatten_dim.nbDims = 1; - flatten_dim.d[0] = dim_prod; - auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - layer->setReshapeDimensions(flatten_dim); - auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode); } diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 0dc08a482733a388f8324444fb5a8d33e4bfb372..1bbfba7e419fb6631918dca971e21de5ee02fcda 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -300,23 +300,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false; } } - if (op_type == "flatten2") { - // flatten doesn't support dynamic shape currently - if (!desc.HasAttr("axis")) { - return false; - } else { - if (with_dynamic_shape) return false; - int axis = BOOST_GET_CONST(int, desc.GetAttr("axis")); - if (axis != 1) return false; - } - } - - if (op_type == "flatten") { - // flatten doesn't support dynamic shape currently + if (op_type == "flatten2" || op_type == "flatten") { if (!desc.HasAttr("axis")) { return false; } else { +#if IS_TRT_VERSION_GE(7130) +#else if (with_dynamic_shape) return false; +#endif int axis = BOOST_GET_CONST(int, desc.GetAttr("axis")); if (axis != 1) return false; } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py new file mode 100644 index 0000000000000000000000000000000000000000..1f8f829d27c2a7a8bf7c96229cb82e2446254261 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_flatten_op.py @@ -0,0 +1,87 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from inference_pass_test import InferencePassTest +import paddle.fluid as fluid +import paddle.fluid.core as core +from paddle.fluid.core import PassVersionChecker +from paddle.fluid.core import AnalysisConfig + + +class TRTFlattenTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 6, 64, 64], dtype="float32") + flatten_out = self.append_flatten(data) + out = fluid.layers.batch_norm(flatten_out, is_test=True) + self.feeds = { + "data": np.random.random([1, 6, 64, 64]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTFlattenTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) + self.fetch_list = [out] + + def append_flatten(self, data): + return fluid.layers.flatten(data, axis=1) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +class TRTFlattenDynamicTest(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[-1, 6, 64, 64], dtype="float32") + flatten_out = self.append_flatten(data) + out = fluid.layers.batch_norm(flatten_out, is_test=True) + self.feeds = { + "data": np.random.random([2, 6, 64, 64]).astype("float32"), + } + self.enable_trt = True + self.trt_parameters = TRTFlattenDynamicTest.TensorRTParam( + 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = TRTFlattenDynamicTest.DynamicShapeParam({ + 'data': [1, 6, 8, 8], + 'flatten_0.tmp_0': [1, 6 * 8 * 8] + }, {'data': [3, 6, 128, 128], + 'flatten_0.tmp_0': [3, 6 * 128 * 128]}, { + 'data': [2, 6, 64, 64], + 'flatten_0.tmp_0': [2, 6 * 64 * 64] + }, False) + self.fetch_list = [out] + + def append_flatten(self, data): + return fluid.layers.flatten(data, axis=1) + + def test_check_output(self): + if core.is_compiled_with_cuda(): + use_gpu = True + self.check_output_with_option(use_gpu) + self.assertTrue( + PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py index 0406e03f54bd4cb70f99d21dcb94b8d380da8954..d85f705c881354ef41a9cad05dc9fccf318e89c1 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py @@ -312,33 +312,6 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest): PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) -class TensorRTSubgraphPassFlattenTest(InferencePassTest): - def setUp(self): - with fluid.program_guard(self.main_program, self.startup_program): - data = fluid.data( - name="data", shape=[-1, 6, 64, 64], dtype="float32") - flatten_out = self.append_flatten(data) - reshape_out = fluid.layers.reshape(flatten_out, [-1, 0, 1, 1]) - out = fluid.layers.batch_norm(reshape_out, is_test=True) - self.feeds = { - "data": np.random.random([1, 6, 64, 64]).astype("float32"), - } - self.enable_trt = True - self.trt_parameters = TensorRTSubgraphPassFlattenTest.TensorRTParam( - 1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False) - self.fetch_list = [out] - - def append_flatten(self, data): - return fluid.layers.flatten(data, axis=1) - - def test_check_output(self): - if core.is_compiled_with_cuda(): - use_gpu = True - self.check_output_with_option(use_gpu) - self.assertTrue( - PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) - - class TensorRTSubgraphPassLayerNormTest(InferencePassTest): def setUp(self): self.set_params()