diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc index 194d76c737c7f9f477ec0354bc41c9ff8dbb42b6..aebdb8f884c2c6df8fa8ef0275daab50b8e2c8e8 100644 --- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc @@ -160,66 +160,61 @@ class FcOpConverter : public OpConverter { if (engine_->with_dynamic_shape()) { // not NCHW layout, but NLP layout with added 'x 1 x 1' auto x_dim = X->getDimensions(); - if (x_dim.nbDims == 3 || x_dim.nbDims == 2) { - auto output_name = op_desc.Output("Out").front(); - // add shuffle before fc - nvinfer1::Dims reshape_before_fc_dim; - reshape_before_fc_dim.nbDims = x_dim.nbDims + 2; - for (int i = 0; i < x_dim.nbDims; i++) { - reshape_before_fc_dim.d[i] = 0; - } - reshape_before_fc_dim.d[x_dim.nbDims] = 1; - reshape_before_fc_dim.d[x_dim.nbDims + 1] = 1; - auto* reshape_before_fc_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); - reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); - reshape_before_fc_layer->setName( - ("shuffle_before_fc(Output: " + output_name + ")").c_str()); + PADDLE_ENFORCE_LE( + x_dim.nbDims - x_num_col_dims, 3, + platform::errors::InvalidArgument( + "Params and input dims mismatch. Paddle-TRT FC " + "converter expects x_dim.nbDims - x_num_col_dims <= 3, but " + "x_dim.nbDims = %d, x_num_col_dims = %d.", + x_dim.nbDims, x_num_col_dims)); + auto output_name = op_desc.Output("Out").front(); + // add shuffle before fc + nvinfer1::Dims reshape_before_fc_dim; + // padding shape "x 1 x 1" + int padding_length = 3 - (x_dim.nbDims - x_num_col_dims); + reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length; + int cur_dim_index = reshape_before_fc_dim.nbDims - 1; + while (padding_length-- > 0) { + reshape_before_fc_dim.d[cur_dim_index--] = 1; + } + while (cur_dim_index >= 0) { + reshape_before_fc_dim.d[cur_dim_index--] = 0; + } - // add fc layer - auto* fc_layer = TRT_ENGINE_ADD_LAYER( - engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0), - n_output, weight.get(), bias.get()); - fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str()); + auto* reshape_before_fc_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); + reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); + reshape_before_fc_layer->setName( + ("shuffle_before_fc(Output: " + output_name + ")").c_str()); - // add shuffle after fc - nvinfer1::Dims reshape_after_fc_dim; - if (x_dim.nbDims == 3) { - if (x_num_col_dims == 2) { - reshape_after_fc_dim.nbDims = 3; - reshape_after_fc_dim.d[0] = 0; - reshape_after_fc_dim.d[1] = 0; - reshape_after_fc_dim.d[2] = 0; - } else { - reshape_after_fc_dim.nbDims = 2; - reshape_after_fc_dim.d[0] = 0; - auto dim = fc_layer->getOutput(0)->getDimensions(); - reshape_after_fc_dim.d[1] = dim.d[1] * dim.d[2]; - } - // x_dim.nbDims == 2 - } else { - reshape_after_fc_dim.nbDims = 2; - reshape_after_fc_dim.d[0] = 0; - reshape_after_fc_dim.d[1] = 0; - } - auto* reshape_after_fc_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0)); - reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim); + // add fc layer + auto* fc_layer = TRT_ENGINE_ADD_LAYER( + engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0), + n_output, weight.get(), bias.get()); + fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str()); - if (activation_type == "relu") { - reshape_after_fc_layer->setName( - ("shuffle_after_fc(Output: " + output_name + ")").c_str()); - nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER( - engine_, Activation, *(reshape_after_fc_layer->getOutput(0)), - nvinfer1::ActivationType::kRELU); - RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle", - {output_name}, test_mode); - } else { - RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc", - {output_name}, test_mode); - } + // add shuffle after fc + nvinfer1::Dims reshape_after_fc_dim; + reshape_after_fc_dim.nbDims = x_num_col_dims + 1; + for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) { + reshape_after_fc_dim.d[i] = 0; + } + + auto* reshape_after_fc_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0)); + reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim); + + if (activation_type == "relu") { + reshape_after_fc_layer->setName( + ("shuffle_after_fc(Output: " + output_name + ")").c_str()); + nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER( + engine_, Activation, *(reshape_after_fc_layer->getOutput(0)), + nvinfer1::ActivationType::kRELU); + RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle", + {output_name}, test_mode); } else { - regist_fc(X, n_output, weight, bias); + RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc", + {output_name}, test_mode); } return; } diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 379237a184ea6463b91e956c55e175d418589432..c8dfc169535da01ea7b2afb97f51a8d67b2dfa43 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (registry == nullptr) return false; } - if (op_type == "mul") { - const int x_num_col_dims = - desc.HasAttr("x_num_col_dims") - ? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims")) - : (desc.HasAttr("in_num_col_dims") - ? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims")) - : 1); - if (x_num_col_dims != 1 && x_num_col_dims != 2) { - return false; - } - } - - if (op_type == "fc") { - const int x_num_col_dims = - desc.HasAttr("x_num_col_dims") - ? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims")) - : (desc.HasAttr("in_num_col_dims") - ? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims")) - : 1); - if (x_num_col_dims != 1 && x_num_col_dims != 2) { - return false; - } - } - if (op_type == "nearest_interp") { std::vector attrs{"data_layout", "interp_method", "align_corners", "scale", diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 6763b702cabe5ded0cd58e7b1fcad67717000fc0..c4d1c6fb3552f3a920822c4c8443f292b2ca7a31 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -819,7 +819,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120) set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120) set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120) set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120) -set_tests_properties(test_activation_op PROPERTIES TIMEOUT 180) +set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270) set_tests_properties(test_normal PROPERTIES TIMEOUT 120) set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py index 48706bf5ad1fd985dfc3191286370983d0820730..3daa50020bab2e37b02627468bad383ef25ed5ac 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py @@ -55,5 +55,182 @@ class FCFusePassTRTTest(InferencePassTest): self.check_output_with_option(use_gpu[i]) +class FCFusePassTRTDynamicDims2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[32, 128], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=1, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = {"data": np.random.random((32, 128)).astype("float32")} + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam( + { + 'data': [1, 128] + }, {'data': [64, 128]}, {'data': [32, 128]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=1, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam( + { + 'data': [1, 128, 32] + }, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=2, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")} + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam( + { + 'data': [1, 32, 32] + }, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[32, 12, 4, 6], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=1, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((32, 12, 4, 6)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam( + { + 'data': [1, 12, 4, 6] + }, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[32, 128, 32, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=2, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((32, 128, 32, 32)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam( + { + 'data': [1, 64, 32, 32] + }, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + +class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest): + def setUp(self): + with fluid.program_guard(self.main_program, self.startup_program): + data = fluid.data( + name="data", shape=[32, 128, 32, 32], dtype="float32") + fc_out1 = fluid.layers.fc(input=data, + size=64, + num_flatten_dims=3, + act="relu") + out = fluid.layers.softmax(input=fc_out1) + + self.feeds = { + "data": np.random.random((32, 128, 32, 32)).astype("float32") + } + self.enable_trt = True + self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam( + 1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False) + self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam( + { + 'data': [1, 128, 32, 32] + }, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False) + self.fetch_list = [out] + + def test_check_output(self): + use_gpu = [False] + if core.is_compiled_with_cuda(): + use_gpu.append(True) + for i in range(len(use_gpu)): + self.check_output_with_option(use_gpu[i]) + + if __name__ == "__main__": unittest.main()