未验证 提交 24bde98f 编写于 作者: P Pei Yang 提交者: GitHub

[Paddle-TRT] add support for trt dynamic shape flatten op (#33394)

* add support for trt dynamic shape flatten op

* add version restriction

* add ut input dynamic shape
上级 fcd93b32
...@@ -25,7 +25,7 @@ namespace inference { ...@@ -25,7 +25,7 @@ namespace inference {
namespace tensorrt { namespace tensorrt {
/* /*
* FlattenOp, only support static shape mode currently. * FlattenOp trt converter
*/ */
class FlattenOpConverter : public OpConverter { class FlattenOpConverter : public OpConverter {
public: public:
...@@ -35,21 +35,48 @@ class FlattenOpConverter : public OpConverter { ...@@ -35,21 +35,48 @@ class FlattenOpConverter : public OpConverter {
// Declare inputs // Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]); auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
int dims = input->getDimensions().nbDims; int dims = input->getDimensions().nbDims;
nvinfer1::IShuffleLayer* layer = nullptr;
if (!engine_->with_dynamic_shape()) {
int dim_prod = 1; int dim_prod = 1;
for (int i = 0; i < dims; i++) { for (int i = 0; i < dims; i++) {
int dim_i = input->getDimensions().d[i]; int dim_i = input->getDimensions().d[i];
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
dim_i, 0, platform::errors::InvalidArgument( dim_i, 0,
platform::errors::InvalidArgument(
"flatten input dim should be > 0, but got %d.", dim_i)); "flatten input dim should be > 0, but got %d.", dim_i));
dim_prod *= dim_i; dim_prod *= dim_i;
} }
nvinfer1::Dims flatten_dim; nvinfer1::Dims flatten_dim;
flatten_dim.nbDims = 1; flatten_dim.nbDims = 1;
flatten_dim.d[0] = dim_prod; flatten_dim.d[0] = dim_prod;
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
layer->setReshapeDimensions(flatten_dim); layer->setReshapeDimensions(flatten_dim);
} else {
auto* shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input);
uint32_t reduce_dim = 1;
auto* reduce_prod_layer = TRT_ENGINE_ADD_LAYER(
engine_, Reduce, *(shape_layer->getOutput(0)),
nvinfer1::ReduceOperation::kPROD, reduce_dim, true);
int32_t* constant_weight_data = new int32_t[1];
constant_weight_data[0] = -1;
TensorRTEngine::Weight constant_weight{
nvinfer1::DataType::kINT32, static_cast<void*>(constant_weight_data),
1};
nvinfer1::Dims constant_dims;
constant_dims.nbDims = 1;
constant_dims.d[0] = 1;
auto* constant_layer = TRT_ENGINE_ADD_LAYER(
engine_, Constant, constant_dims, constant_weight.get());
std::vector<nvinfer1::ITensor*> itensors;
itensors.push_back(constant_layer->getOutput(0));
itensors.push_back(reduce_prod_layer->getOutput(0));
auto* concat_layer =
TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), 2);
concat_layer->setAxis(0);
layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
layer->setInput(1, *(concat_layer->getOutput(0)));
}
auto output_name = op_desc.Output("Out")[0]; auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode); RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode);
} }
......
...@@ -300,23 +300,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, ...@@ -300,23 +300,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false; if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false;
} }
} }
if (op_type == "flatten2") { if (op_type == "flatten2" || op_type == "flatten") {
// flatten doesn't support dynamic shape currently
if (!desc.HasAttr("axis")) {
return false;
} else {
if (with_dynamic_shape) return false;
int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
if (axis != 1) return false;
}
}
if (op_type == "flatten") {
// flatten doesn't support dynamic shape currently
if (!desc.HasAttr("axis")) { if (!desc.HasAttr("axis")) {
return false; return false;
} else { } else {
#if IS_TRT_VERSION_GE(7130)
#else
if (with_dynamic_shape) return false; if (with_dynamic_shape) return false;
#endif
int axis = BOOST_GET_CONST(int, desc.GetAttr("axis")); int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
if (axis != 1) return false; if (axis != 1) return false;
} }
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TRTFlattenTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
flatten_out = self.append_flatten(data)
out = fluid.layers.batch_norm(flatten_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTFlattenTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def append_flatten(self, data):
return fluid.layers.flatten(data, axis=1)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TRTFlattenDynamicTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
flatten_out = self.append_flatten(data)
out = fluid.layers.batch_norm(flatten_out, is_test=True)
self.feeds = {
"data": np.random.random([2, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTFlattenDynamicTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = TRTFlattenDynamicTest.DynamicShapeParam({
'data': [1, 6, 8, 8],
'flatten_0.tmp_0': [1, 6 * 8 * 8]
}, {'data': [3, 6, 128, 128],
'flatten_0.tmp_0': [3, 6 * 128 * 128]}, {
'data': [2, 6, 64, 64],
'flatten_0.tmp_0': [2, 6 * 64 * 64]
}, False)
self.fetch_list = [out]
def append_flatten(self, data):
return fluid.layers.flatten(data, axis=1)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
if __name__ == "__main__":
unittest.main()
...@@ -312,33 +312,6 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest): ...@@ -312,33 +312,6 @@ class TensorRTSubgraphPassTransposeTest(InferencePassTest):
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassFlattenTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
flatten_out = self.append_flatten(data)
reshape_out = fluid.layers.reshape(flatten_out, [-1, 0, 1, 1])
out = fluid.layers.batch_norm(reshape_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassFlattenTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def append_flatten(self, data):
return fluid.layers.flatten(data, axis=1)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassLayerNormTest(InferencePassTest): class TensorRTSubgraphPassLayerNormTest(InferencePassTest):
def setUp(self): def setUp(self):
self.set_params() self.set_params()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册