未验证 提交 f272e59a 编写于 作者: S Shang Zhizhou 提交者: GitHub

fix tc trt shape (#32458)

* fix tc trt shape

* fix fc dynamic shape

* add fc shape assert

* update
上级 06276f46
......@@ -160,16 +160,27 @@ class FcOpConverter : public OpConverter {
if (engine_->with_dynamic_shape()) {
// not NCHW layout, but NLP layout with added 'x 1 x 1'
auto x_dim = X->getDimensions();
if (x_dim.nbDims == 3 || x_dim.nbDims == 2) {
PADDLE_ENFORCE_LE(
x_dim.nbDims - x_num_col_dims, 3,
platform::errors::InvalidArgument(
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
"x_dim.nbDims = %d, x_num_col_dims = %d.",
x_dim.nbDims, x_num_col_dims));
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
reshape_before_fc_dim.nbDims = x_dim.nbDims + 2;
for (int i = 0; i < x_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 0;
// padding shape "x 1 x 1"
int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
int cur_dim_index = reshape_before_fc_dim.nbDims - 1;
while (padding_length-- > 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 1;
}
while (cur_dim_index >= 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 0;
}
reshape_before_fc_dim.d[x_dim.nbDims] = 1;
reshape_before_fc_dim.d[x_dim.nbDims + 1] = 1;
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
......@@ -184,24 +195,11 @@ class FcOpConverter : public OpConverter {
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
if (x_dim.nbDims == 3) {
if (x_num_col_dims == 2) {
reshape_after_fc_dim.nbDims = 3;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
reshape_after_fc_dim.d[2] = 0;
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
auto dim = fc_layer->getOutput(0)->getDimensions();
reshape_after_fc_dim.d[1] = dim.d[1] * dim.d[2];
}
// x_dim.nbDims == 2
} else {
reshape_after_fc_dim.nbDims = 2;
reshape_after_fc_dim.d[0] = 0;
reshape_after_fc_dim.d[1] = 0;
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;
}
auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
......@@ -218,9 +216,6 @@ class FcOpConverter : public OpConverter {
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
}
} else {
regist_fc(X, n_output, weight, bias);
}
return;
}
// in order to handle situations in NLP models(input dims < 3,
......
......@@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (registry == nullptr) return false;
}
if (op_type == "mul") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}
if (op_type == "fc") {
const int x_num_col_dims =
desc.HasAttr("x_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims != 1 && x_num_col_dims != 2) {
return false;
}
}
if (op_type == "nearest_interp") {
std::vector<std::string> attrs{"data_layout", "interp_method",
"align_corners", "scale",
......
......@@ -819,7 +819,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120)
set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120)
set_tests_properties(test_activation_op PROPERTIES TIMEOUT 180)
set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270)
set_tests_properties(test_normal PROPERTIES TIMEOUT 120)
set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120)
......
......@@ -55,5 +55,182 @@ class FCFusePassTRTTest(InferencePassTest):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam(
{
'data': [1, 128]
}, {'data': [64, 128]}, {'data': [32, 128]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam(
{
'data': [1, 128, 32]
}, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam(
{
'data': [1, 32, 32]
}, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 12, 4, 6], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=1,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {
"data": np.random.random((32, 12, 4, 6)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam(
{
'data': [1, 12, 4, 6]
}, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=2,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam(
{
'data': [1, 64, 32, 32]
}, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[32, 128, 32, 32], dtype="float32")
fc_out1 = fluid.layers.fc(input=data,
size=64,
num_flatten_dims=3,
act="relu")
out = fluid.layers.softmax(input=fc_out1)
self.feeds = {
"data": np.random.random((32, 128, 32, 32)).astype("float32")
}
self.enable_trt = True
self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam(
1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam(
{
'data': [1, 128, 32, 32]
}, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
self.fetch_list = [out]
def test_check_output(self):
use_gpu = [False]
if core.is_compiled_with_cuda():
use_gpu.append(True)
for i in range(len(use_gpu)):
self.check_output_with_option(use_gpu[i])
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册