未验证 提交 2e9e3fad 编写于 作者: P Pei Yang 提交者: GitHub

add n-d input support for trt scale converter (#31316)

* add n-d input support for trt scale converter

* add flatten for ut

* fix dims
上级 6404c438
......@@ -58,6 +58,8 @@ class ScaleOpConverter : public OpConverter {
return tmp_data;
};
int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
float* bias_ptr = create_weights(bias, "bias");
float* scale_ptr = create_weights(scale, "scale");
......@@ -70,19 +72,22 @@ class ScaleOpConverter : public OpConverter {
nvinfer1::ILayer* layer = nullptr;
auto input_dim = input->getDimensions();
PADDLE_ENFORCE_GE(input_dim.nbDims, 3,
platform::errors::Fatal(
"Paddle-TRT scale mode only support dimension >= 3"));
nvinfer1::IShuffleLayer* expand_layer = nullptr;
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
if (input_dim.nbDims == 3) {
// TensorRT scale layer is not supporting input dims < 4 when using
// explicit batch
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims expand_shape;
expand_shape.nbDims = 3 + dynamic_shape_offset;
for (int i = 0; i < 3 + dynamic_shape_offset; i++) {
if (i < input_dim.nbDims) {
expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
} else {
expand_shape.d[i] = 1;
}
}
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
nvinfer1::Dims4 target_shape(0, 0, 0, 1); // expand 1 dims
expand_layer->setReshapeDimensions(target_shape);
expand_layer->setReshapeDimensions(expand_shape);
input = expand_layer->getOutput(0);
}
......@@ -104,13 +109,15 @@ class ScaleOpConverter : public OpConverter {
PADDLE_ENFORCE_EQ(layer != nullptr, true,
platform::errors::Fatal("Create scale layer failed."));
if (input_dim.nbDims == 3) {
// TensorRT scale layer is not supporting input dims < 4 when using
// explicit batch
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = input_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) {
squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
}
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
nvinfer1::Dims3 target_shape(0, 0, 0); // expand 1 dims
squeeze_layer->setReshapeDimensions(target_shape);
squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
}
RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode);
......
......@@ -153,6 +153,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
}
}
if (op_type == "group_norm") {
if (!with_dynamic_shape) return false;
bool has_attrs = (desc.HasAttr("epsilon") && desc.HasAttr("groups"));
if (has_attrs == false) return false;
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
from inference_pass_test import InferencePassTest
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.core import PassVersionChecker
from paddle.fluid.core import AnalysisConfig
class TRTScaleTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[-1, 512], dtype="float32")
scale_out = self.append_scale(data)
out = fluid.layers.batch_norm(scale_out, is_test=True)
self.feeds = {"data": np.random.random([1, 512]).astype("float32"), }
self.enable_trt = True
self.trt_parameters = TRTScaleTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def append_scale(self, data):
return fluid.layers.scale(
x=data, scale=2.0, bias=-1.0, bias_after_scale=False)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册