未验证 提交 f272e59a 编写于 作者: S Shang Zhizhou 提交者: GitHub

fix tc trt shape (#32458)

* fix tc trt shape

* fix fc dynamic shape

* add fc shape assert

* update
上级 06276f46
...@@ -160,66 +160,61 @@ class FcOpConverter : public OpConverter { ...@@ -160,66 +160,61 @@ class FcOpConverter : public OpConverter {
if (engine_->with_dynamic_shape()) { if (engine_->with_dynamic_shape()) {
// not NCHW layout, but NLP layout with added 'x 1 x 1' // not NCHW layout, but NLP layout with added 'x 1 x 1'
auto x_dim = X->getDimensions(); auto x_dim = X->getDimensions();
if (x_dim.nbDims == 3 || x_dim.nbDims == 2) { PADDLE_ENFORCE_LE(
auto output_name = op_desc.Output("Out").front(); x_dim.nbDims - x_num_col_dims, 3,
// add shuffle before fc platform::errors::InvalidArgument(
nvinfer1::Dims reshape_before_fc_dim; "Params and input dims mismatch. Paddle-TRT FC "
reshape_before_fc_dim.nbDims = x_dim.nbDims + 2; "converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
for (int i = 0; i < x_dim.nbDims; i++) { "x_dim.nbDims = %d, x_num_col_dims = %d.",
reshape_before_fc_dim.d[i] = 0; x_dim.nbDims, x_num_col_dims));
} auto output_name = op_desc.Output("Out").front();
reshape_before_fc_dim.d[x_dim.nbDims] = 1; // add shuffle before fc
reshape_before_fc_dim.d[x_dim.nbDims + 1] = 1; nvinfer1::Dims reshape_before_fc_dim;
auto* reshape_before_fc_layer = // padding shape "x 1 x 1"
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
reshape_before_fc_layer->setName( int cur_dim_index = reshape_before_fc_dim.nbDims - 1;
("shuffle_before_fc(Output: " + output_name + ")").c_str()); while (padding_length-- > 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 1;
}
while (cur_dim_index >= 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 0;
}
// add fc layer auto* reshape_before_fc_layer =
auto* fc_layer = TRT_ENGINE_ADD_LAYER( TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0), reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
n_output, weight.get(), bias.get()); reshape_before_fc_layer->setName(
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str()); ("shuffle_before_fc(Output: " + output_name + ")").c_str());
// add shuffle after fc // add fc layer
nvinfer1::Dims reshape_after_fc_dim; auto* fc_layer = TRT_ENGINE_ADD_LAYER(
if (x_dim.nbDims == 3) { engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
if (x_num_col_dims == 2) { n_output, weight.get(), bias.get());
reshape_after_fc_dim.nbDims = 3; fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
reshape_after_fc_dim.d[2] = 0;
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
auto dim = fc_layer->getOutput(0)->getDimensions();
reshape_after_fc_dim.d[1] = dim.d[1] * dim.d[2];
}
// x_dim.nbDims == 2
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
}
auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
if (activation_type == "relu") { // add shuffle after fc
reshape_after_fc_layer->setName( nvinfer1::Dims reshape_after_fc_dim;
("shuffle_after_fc(Output: " + output_name + ")").c_str()); reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER( for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)), reshape_after_fc_dim.d[i] = 0;
nvinfer1::ActivationType::kRELU); }
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode); auto* reshape_after_fc_layer =
} else { TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc", reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
{output_name}, test_mode);
} if (activation_type == "relu") {
reshape_after_fc_layer->setName(
("shuffle_after_fc(Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
nvinfer1::ActivationType::kRELU);
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else { } else {
regist_fc(X, n_output, weight, bias); RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
} }
return; return;
} }
......
...@@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, ...@@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (registry == nullptr) return false; if (registry == nullptr) return false;
} }
if (op_type == "mul") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}
if (op_type == "fc") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}
if (op_type == "nearest_interp") { if (op_type == "nearest_interp") {
std::vector<std::string> attrs{"data_layout", "interp_method", std::vector<std::string> attrs{"data_layout", "interp_method",
"align_corners", "scale", "align_corners", "scale",
......
...@@ -819,7 +819,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120) ...@@ -819,7 +819,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120)
set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120) set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120) set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120) set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120)
set_tests_properties(test_activation_op PROPERTIES TIMEOUT 180) set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270)
set_tests_properties(test_normal PROPERTIES TIMEOUT 120) set_tests_properties(test_normal PROPERTIES TIMEOUT 120)
set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120)
......
...@@ -55,5 +55,182 @@ class FCFusePassTRTTest(InferencePassTest): ...@@ -55,5 +55,182 @@ class FCFusePassTRTTest(InferencePassTest):
self.check_output_with_option(use_gpu[i]) self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam(
{
'data': [1, 128]
}, {'data': [64, 128]}, {'data': [32, 128]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam(
{
'data': [1, 128, 32]
}, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam(
{
'data': [1, 32, 32]
}, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 12, 4, 6], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {
"data": np.random.random((32, 12, 4, 6)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam(
{
'data': [1, 12, 4, 6]
}, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam(
{
'data': [1, 64, 32, 32]
}, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=3,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam(
{
'data': [1, 128, 32, 32]
}, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册