fix tc trt shape (#32458)

* fix tc trt shape * fix fc dynamic shape * add fc shape assert * update

fix tc trt shape (#32458)
* fix tc trt shape * fix fc dynamic shape * add fc shape assert * update
f272e59a · Shang Zhizhou · GitHub · 06276f46 · f272e59a · f272e59a
4 changed file
--- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
@@ -160,66 +160,61 @@ class FcOpConverter : public OpConverter {
    if (engine_->with_dynamic_shape()) {
      // not NCHW layout, but NLP layout with added 'x 1 x 1'
      auto x_dim = X->getDimensions();
-      if (x_dim.nbDims == 3 || x_dim.nbDims == 2) {
-        auto output_name = op_desc.Output("Out").front();
-        // add shuffle before fc
-        nvinfer1::Dims reshape_before_fc_dim;
-        reshape_before_fc_dim.nbDims = x_dim.nbDims + 2;
-        for (int i = 0; i < x_dim.nbDims; i++) {
-          reshape_before_fc_dim.d[i] = 0;
-        }
-        reshape_before_fc_dim.d[x_dim.nbDims] = 1;
-        reshape_before_fc_dim.d[x_dim.nbDims + 1] = 1;
-        auto* reshape_before_fc_layer =
-            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
-        reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
-        reshape_before_fc_layer->setName(
-            ("shuffle_before_fc(Output: " + output_name + ")").c_str());
+      PADDLE_ENFORCE_LE(
+          x_dim.nbDims - x_num_col_dims, 3,
+          platform::errors::InvalidArgument(
+              "Params and input dims mismatch. Paddle-TRT FC "
+              "converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
+              "x_dim.nbDims = %d, x_num_col_dims = %d.",
+              x_dim.nbDims, x_num_col_dims));
+      auto output_name = op_desc.Output("Out").front();
+      // add shuffle before fc
+      nvinfer1::Dims reshape_before_fc_dim;
+      // padding shape "x 1 x 1"
+      int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
+      reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
+      int cur_dim_index = reshape_before_fc_dim.nbDims - 1;
+      while (padding_length-- > 0) {
+        reshape_before_fc_dim.d[cur_dim_index--] = 1;
+      }
+      while (cur_dim_index >= 0) {
+        reshape_before_fc_dim.d[cur_dim_index--] = 0;
+      }

-        // add fc layer
-        auto* fc_layer = TRT_ENGINE_ADD_LAYER(
-            engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
-            n_output, weight.get(), bias.get());
-        fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
+      auto* reshape_before_fc_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
+      reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim);
+      reshape_before_fc_layer->setName(
+          ("shuffle_before_fc(Output: " + output_name + ")").c_str());

-        // add shuffle after fc
-        nvinfer1::Dims reshape_after_fc_dim;
-        if (x_dim.nbDims == 3) {
-          if (x_num_col_dims == 2) {
-            reshape_after_fc_dim.nbDims = 3;
-            reshape_after_fc_dim.d[0] = 0;
-            reshape_after_fc_dim.d[1] = 0;
-            reshape_after_fc_dim.d[2] = 0;
-          } else {
-            reshape_after_fc_dim.nbDims = 2;
-            reshape_after_fc_dim.d[0] = 0;
-            auto dim = fc_layer->getOutput(0)->getDimensions();
-            reshape_after_fc_dim.d[1] = dim.d[1] * dim.d[2];
-          }
-          // x_dim.nbDims == 2
-        } else {
-          reshape_after_fc_dim.nbDims = 2;
-          reshape_after_fc_dim.d[0] = 0;
-          reshape_after_fc_dim.d[1] = 0;
-        }
-        auto* reshape_after_fc_layer =
-            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
-        reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
+      // add fc layer
+      auto* fc_layer = TRT_ENGINE_ADD_LAYER(
+          engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
+          n_output, weight.get(), bias.get());
+      fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());

-        if (activation_type == "relu") {
-          reshape_after_fc_layer->setName(
-              ("shuffle_after_fc(Output: " + output_name + ")").c_str());
-          nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
-              engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
-              nvinfer1::ActivationType::kRELU);
-          RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
-                                   {output_name}, test_mode);
-        } else {
-          RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
-                                   {output_name}, test_mode);
-        }
+      // add shuffle after fc
+      nvinfer1::Dims reshape_after_fc_dim;
+      reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
+      for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
+        reshape_after_fc_dim.d[i] = 0;
+      }
+
+      auto* reshape_after_fc_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
+      reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim);
+
+      if (activation_type == "relu") {
+        reshape_after_fc_layer->setName(
+            ("shuffle_after_fc(Output: " + output_name + ")").c_str());
+        nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
+            engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
+            nvinfer1::ActivationType::kRELU);
+        RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
+                                 {output_name}, test_mode);
      } else {
-        regist_fc(X, n_output, weight, bias);
+        RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
+                                 {output_name}, test_mode);
      }
      return;
    }

--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -343,30 +343,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
      if (registry == nullptr) return false;
    }

-    if (op_type == "mul") {
-      const int x_num_col_dims =
-          desc.HasAttr("x_num_col_dims")
-              ? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
-              : (desc.HasAttr("in_num_col_dims")
-                     ? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
-                     : 1);
-      if (x_num_col_dims != 1 && x_num_col_dims != 2) {
-        return false;
-      }
-    }
-
-    if (op_type == "fc") {
-      const int x_num_col_dims =
-          desc.HasAttr("x_num_col_dims")
-              ? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
-              : (desc.HasAttr("in_num_col_dims")
-                     ? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
-                     : 1);
-      if (x_num_col_dims != 1 && x_num_col_dims != 2) {
-        return false;
-      }
-    }
-
    if (op_type == "nearest_interp") {
      std::vector<std::string> attrs{"data_layout",   "interp_method",
                                     "align_corners", "scale",

--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -819,7 +819,7 @@ set_tests_properties(test_imperative_optimizer PROPERTIES TIMEOUT 120)
 set_tests_properties(test_pool2d_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_transpose_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_eager_deletion_gru_net PROPERTIES TIMEOUT 120)
-set_tests_properties(test_activation_op PROPERTIES TIMEOUT 180)
+set_tests_properties(test_activation_op PROPERTIES TIMEOUT 270)
 set_tests_properties(test_normal PROPERTIES TIMEOUT 120)
 set_tests_properties(test_lstmp_op PROPERTIES TIMEOUT 120)
 set_tests_properties(test_bilinear_interp_op PROPERTIES TIMEOUT 120)

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_fc_fuse_pass.py
@@ -55,5 +55,182 @@ class FCFusePassTRTTest(InferencePassTest):
            self.check_output_with_option(use_gpu[i])


+class FCFusePassTRTDynamicDims2Test(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[32, 128], dtype="float32")
+            fc_out1 = fluid.layers.fc(input=data,
+                                      size=64,
+                                      num_flatten_dims=1,
+                                      act="relu")
+            out = fluid.layers.softmax(input=fc_out1)
+
+        self.feeds = {"data": np.random.random((32, 128)).astype("float32")}
+        self.enable_trt = True
+        self.trt_parameters = FCFusePassTRTDynamicDims2Test.TensorRTParam(
+            1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
+        self.dynamic_shape_params = FCFusePassTRTDynamicDims2Test.DynamicShapeParam(
+            {
+                'data': [1, 128]
+            }, {'data': [64, 128]}, {'data': [32, 128]}, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        use_gpu = [False]
+        if core.is_compiled_with_cuda():
+            use_gpu.append(True)
+        for i in range(len(use_gpu)):
+            self.check_output_with_option(use_gpu[i])
+
+
+class FCFusePassTRTDynamicDims3Cols1Test(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
+            fc_out1 = fluid.layers.fc(input=data,
+                                      size=64,
+                                      num_flatten_dims=1,
+                                      act="relu")
+            out = fluid.layers.softmax(input=fc_out1)
+
+        self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
+        self.enable_trt = True
+        self.trt_parameters = FCFusePassTRTDynamicDims3Cols1Test.TensorRTParam(
+            1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
+        self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols1Test.DynamicShapeParam(
+            {
+                'data': [1, 128, 32]
+            }, {'data': [64, 128, 32]}, {'data': [32, 128, 32]}, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        use_gpu = [False]
+        if core.is_compiled_with_cuda():
+            use_gpu.append(True)
+        for i in range(len(use_gpu)):
+            self.check_output_with_option(use_gpu[i])
+
+
+class FCFusePassTRTDynamicDims3Cols2Test(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(name="data", shape=[32, 128, 32], dtype="float32")
+            fc_out1 = fluid.layers.fc(input=data,
+                                      size=64,
+                                      num_flatten_dims=2,
+                                      act="relu")
+            out = fluid.layers.softmax(input=fc_out1)
+
+        self.feeds = {"data": np.random.random((32, 128, 32)).astype("float32")}
+        self.enable_trt = True
+        self.trt_parameters = FCFusePassTRTDynamicDims3Cols2Test.TensorRTParam(
+            1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
+        self.dynamic_shape_params = FCFusePassTRTDynamicDims3Cols2Test.DynamicShapeParam(
+            {
+                'data': [1, 32, 32]
+            }, {'data': [64, 256, 32]}, {'data': [32, 128, 32]}, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        use_gpu = [False]
+        if core.is_compiled_with_cuda():
+            use_gpu.append(True)
+        for i in range(len(use_gpu)):
+            self.check_output_with_option(use_gpu[i])
+
+
+class FCFusePassTRTDynamicDims4Cols1Test(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(
+                name="data", shape=[32, 12, 4, 6], dtype="float32")
+            fc_out1 = fluid.layers.fc(input=data,
+                                      size=64,
+                                      num_flatten_dims=1,
+                                      act="relu")
+            out = fluid.layers.softmax(input=fc_out1)
+
+        self.feeds = {
+            "data": np.random.random((32, 12, 4, 6)).astype("float32")
+        }
+        self.enable_trt = True
+        self.trt_parameters = FCFusePassTRTDynamicDims4Cols1Test.TensorRTParam(
+            1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
+        self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols1Test.DynamicShapeParam(
+            {
+                'data': [1, 12, 4, 6]
+            }, {'data': [64, 12, 4, 6]}, {'data': [32, 12, 4, 6]}, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        use_gpu = [False]
+        if core.is_compiled_with_cuda():
+            use_gpu.append(True)
+        for i in range(len(use_gpu)):
+            self.check_output_with_option(use_gpu[i])
+
+
+class FCFusePassTRTDynamicDims4Cols2Test(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(
+                name="data", shape=[32, 128, 32, 32], dtype="float32")
+            fc_out1 = fluid.layers.fc(input=data,
+                                      size=64,
+                                      num_flatten_dims=2,
+                                      act="relu")
+            out = fluid.layers.softmax(input=fc_out1)
+
+        self.feeds = {
+            "data": np.random.random((32, 128, 32, 32)).astype("float32")
+        }
+        self.enable_trt = True
+        self.trt_parameters = FCFusePassTRTDynamicDims4Cols2Test.TensorRTParam(
+            1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
+        self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols2Test.DynamicShapeParam(
+            {
+                'data': [1, 64, 32, 32]
+            }, {'data': [64, 256, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        use_gpu = [False]
+        if core.is_compiled_with_cuda():
+            use_gpu.append(True)
+        for i in range(len(use_gpu)):
+            self.check_output_with_option(use_gpu[i])
+
+
+class FCFusePassTRTDynamicDims4Cols3Test(InferencePassTest):
+    def setUp(self):
+        with fluid.program_guard(self.main_program, self.startup_program):
+            data = fluid.data(
+                name="data", shape=[32, 128, 32, 32], dtype="float32")
+            fc_out1 = fluid.layers.fc(input=data,
+                                      size=64,
+                                      num_flatten_dims=3,
+                                      act="relu")
+            out = fluid.layers.softmax(input=fc_out1)
+
+        self.feeds = {
+            "data": np.random.random((32, 128, 32, 32)).astype("float32")
+        }
+        self.enable_trt = True
+        self.trt_parameters = FCFusePassTRTDynamicDims4Cols3Test.TensorRTParam(
+            1 << 30, 32, 2, AnalysisConfig.Precision.Float32, False, False)
+        self.dynamic_shape_params = FCFusePassTRTDynamicDims4Cols3Test.DynamicShapeParam(
+            {
+                'data': [1, 128, 32, 32]
+            }, {'data': [64, 128, 32, 32]}, {'data': [32, 128, 32, 32]}, False)
+        self.fetch_list = [out]
+
+    def test_check_output(self):
+        use_gpu = [False]
+        if core.is_compiled_with_cuda():
+            use_gpu.append(True)
+        for i in range(len(use_gpu)):
+            self.check_output_with_option(use_gpu[i])
+
+
 if __name__ == "__main__":
    unittest.main()