From 5c291737830e03cfd816d36056ba48e0fc1fbc35 Mon Sep 17 00:00:00 2001
From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com>
Date: Mon, 18 Jul 2022 11:22:37 +0800
Subject: [PATCH] [Paddle-TRT] remove useless code in fc (#44382)

* remove useless code in fc
---
 .../fluid/inference/tensorrt/convert/fc_op.cc | 100 +----
 .../ir/inference/test_trt_convert_fc.py       | 361 ++++++++++++++++++
 2 files changed, 377 insertions(+), 84 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fc.py

diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
index 1bd9cf8712..0d61dc6d0e 100644
--- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc
@@ -333,91 +333,23 @@ class FcOpConverter : public OpConverter {
     if (!engine_->with_dynamic_shape()) {
       x_num_col_dims--;
     }
-    // If use tensorrt'oss, the x_dim and x_num_col_dims need change, and can
-    // not add Shuffle layer in ernie's multihead.
-    if (x_dim.nbDims == 4 && x_num_col_dims == 1) {
-      if (enable_int8 || support_int8) {
-        // add conv1x1 layer
-        nvinfer1::DimsHW nv_ksize(1, 1);
-        auto* fc_layer_int8 = TRT_ENGINE_ADD_LAYER(engine_,
-                                                   Convolution,
-                                                   *X,
-                                                   n_output,
-                                                   nv_ksize,
-                                                   weight.get(),
-                                                   bias.get());
-        if (activation_type == "relu") {
-          fc_layer_int8->setName(
-              ("ernie_fc_op_int8: Convolution (Output: " + output_name + ")")
-                  .c_str());
-          PADDLE_ENFORCE_EQ(
-              op_desc.HasAttr("out_threshold"),
-              true,
-              platform::errors::InvalidArgument(
-                  "must have out threshold in fc layers in int8 mode"));
-          float out_scale = 0;
-          if (enable_int8) {
-            out_scale =
-                BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
-          } else {
-            out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("Out"));
-          }
-          engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0),
-                                         out_scale);
-          nvinfer1::IActivationLayer* relu_layer_int8 =
-              TRT_ENGINE_ADD_LAYER(engine_,
-                                   Activation,
-                                   *(fc_layer_int8->getOutput(0)),
-                                   nvinfer1::ActivationType::kRELU);
-          RreplenishLayerAndOutput(relu_layer_int8,
-                                   "relu_after_ernie_fc_int8",
-                                   {output_name},
-                                   test_mode);
-        } else {
-          RreplenishLayerAndOutput(fc_layer_int8,
-                                   "ernie_fc_op_int8: Convolution",
-                                   {output_name},
-                                   test_mode);
-        }
-      } else {
-        // add fc layer
-        auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(
-            engine_, FullyConnected, *X, n_output, weight.get(), bias.get());
-        if (activation_type == "relu") {
-          fc_layer_float->setName(
-              ("ernie_fc_op_float: (Output: " + output_name + ")").c_str());
-          nvinfer1::IActivationLayer* relu_layer_float =
-              TRT_ENGINE_ADD_LAYER(engine_,
-                                   Activation,
-                                   *(fc_layer_float->getOutput(0)),
-                                   nvinfer1::ActivationType::kRELU);
-          RreplenishLayerAndOutput(relu_layer_float,
-                                   "relu_after_ernie_fc_float",
-                                   {output_name},
-                                   test_mode);
-        } else {
-          RreplenishLayerAndOutput(
-              fc_layer_float, "ernie_fc_op_float", {output_name}, test_mode);
-        }
-      }
-    } else {  // need reshape input before and after fc
-      PADDLE_ENFORCE_GT(
-          x_dim.nbDims,
-          x_num_col_dims,
-          platform::errors::InvalidArgument(
-              "Params and input dims mismatch. Paddle-TRT FC "
-              "converter expects x_dim.nbDims > x_num_col_dims, but "
-              "x_dim.nbDims : %d, x_num_col_dims : %d.",
-              x_dim.nbDims,
-              x_num_col_dims));
-      auto* reshape_before_fc_layer =
-          reshape_before_fc(X, x_dim, x_num_col_dims, output_name);
-      auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);
-      if (enable_int8 || support_int8) {
-        engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
-      }
-      regist_fc(reshape_itensor, n_output, weight, bias);
+    PADDLE_ENFORCE_GT(
+        x_dim.nbDims,
+        x_num_col_dims,
+        platform::errors::InvalidArgument(
+            "Params and input dims mismatch. Paddle-TRT FC "
+            "converter expects x_dim.nbDims > x_num_col_dims, but "
+            "x_dim.nbDims : %d, x_num_col_dims : %d.",
+            x_dim.nbDims,
+            x_num_col_dims));
+    // need reshape input before and after fc
+    auto* reshape_before_fc_layer =
+        reshape_before_fc(X, x_dim, x_num_col_dims, output_name);
+    auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);
+    if (enable_int8 || support_int8) {
+      engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
     }
+    regist_fc(reshape_itensor, n_output, weight, bias);
   }
 };
 
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fc.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fc.py
new file mode 100644
index 0000000000..9b6badf394
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_fc.py
@@ -0,0 +1,361 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
+import numpy as np
+import unittest
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+import os
+
+
+class TrtConvertFcTest(TrtLayerAutoScanTest):
+
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        # The output has diff between gpu and trt in CI windows
+        if (os.name == 'nt'):
+            return False
+        return True
+
+    def sample_program_configs(self):
+        self.trt_param.workspace_size = 1073741824
+
+        def generate_input1(batch, attrs: List[Dict[str, Any]]):
+            return np.random.random([batch, 3, 64, (int)(attrs[0]["m"] / 2),
+                                     2]).astype(np.float32)
+
+        def generate_w(batch, attrs: List[Dict[str, Any]]):
+            return np.random.random([attrs[0]["m"],
+                                     attrs[0]["n"]]).astype(np.float32)
+
+        def generate_bias(batch, attrs: List[Dict[str, Any]]):
+            return np.random.random([attrs[0]["n"]]).astype(np.float32)
+
+        for batch in [1, 4]:
+            for [m, n] in [[32, 23]]:
+                dics = [
+                    {
+                        "in_num_col_dims": 3,
+                        # for my conveinence
+                        "m": m,
+                        "n": n,
+                    },
+                    {}
+                ]
+
+                ops_config = [
+                    {
+                        "op_type": "fc",
+                        "op_inputs": {
+                            "Input": ["input_data"],
+                            "W": ["w_data"],
+                            "Bias": ["bias_data"]
+                        },
+                        "op_outputs": {
+                            "Out": ["output_data"]
+                        },
+                        "op_attrs": dics[0]
+                    },
+                ]
+
+                ops = self.generate_op_config(ops_config)
+
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={
+                        "w_data":
+                        TensorConfig(data_gen=partial(generate_w, batch, dics)),
+                        "bias_data":
+                        TensorConfig(
+                            data_gen=partial(generate_bias, batch, dics))
+                    },
+                    inputs={
+                        "input_data":
+                        TensorConfig(
+                            data_gen=partial(generate_input1, batch, dics)),
+                    },
+                    outputs=["output_data"])
+
+                yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [1, 3, 32, 16, 2],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [4, 3, 64, 16, 2],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [1, 3, 64, 16, 2],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            return 1, 2
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # # for static_shape
+        # clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), (1e-5, 1e-5)
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), (1e-5, 1e-5)
+
+    def test(self):
+        self.run_test()
+
+    def test_quant(self):
+        self.run_test(quant=True)
+
+
+class TrtConvertFcTest2(TrtLayerAutoScanTest):
+
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        # The output has diff between gpu and trt in CI windows
+        if (os.name == 'nt'):
+            return False
+        return True
+
+    def sample_program_configs(self):
+        self.trt_param.workspace_size = 1073741824
+
+        def generate_input1(batch, attrs: List[Dict[str, Any]]):
+            return np.random.random([batch, 3, 64, 14]).astype(np.float32)
+
+        def generate_w(batch, attrs: List[Dict[str, Any]]):
+            return np.random.random([attrs[0]["m"],
+                                     attrs[0]["n"]]).astype(np.float32)
+
+        def generate_bias(batch, attrs: List[Dict[str, Any]]):
+            return np.random.random([attrs[0]["n"]]).astype(np.float32)
+
+        for batch in [1, 4]:
+            for [m, n] in [[14, 43]]:
+                dics = [
+                    {
+                        "in_num_col_dims": 3,
+                        # for my conveinence
+                        "m": m,
+                        "n": n,
+                    },
+                    {}
+                ]
+
+                ops_config = [
+                    {
+                        "op_type": "fc",
+                        "op_inputs": {
+                            "Input": ["input_data"],
+                            "W": ["w_data"],
+                            "Bias": ["bias_data"]
+                        },
+                        "op_outputs": {
+                            "Out": ["output_data"]
+                        },
+                        "op_attrs": dics[0]
+                    },
+                ]
+
+                ops = self.generate_op_config(ops_config)
+
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={
+                        "w_data":
+                        TensorConfig(data_gen=partial(generate_w, batch, dics)),
+                        "bias_data":
+                        TensorConfig(
+                            data_gen=partial(generate_bias, batch, dics))
+                    },
+                    inputs={
+                        "input_data":
+                        TensorConfig(
+                            data_gen=partial(generate_input1, batch, dics)),
+                    },
+                    outputs=["output_data"])
+
+                yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+
+        def generate_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [1, 3, 32, 14],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [4, 3, 64, 14],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [1, 3, 64, 14],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        # # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (1, 2), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+
+        # for dynamic_shape
+        generate_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (1, 2), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+
+    def test(self):
+        self.run_test()
+
+
+# this is the special case when x_dim.nbDims == 4 && x_num_col_dims == 1
+class TrtConvertFcTest3(TrtLayerAutoScanTest):
+
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        self.trt_param.workspace_size = 1073741824
+
+        def generate_input1(batch, attrs: List[Dict[str, Any]]):
+            return np.ones([batch, 14, 1, 2]).astype(np.float32)
+
+        def generate_w(batch, attrs: List[Dict[str, Any]]):
+            return np.ones([attrs[0]["m"], attrs[0]["n"]]).astype(np.float32)
+
+        def generate_bias(batch, attrs: List[Dict[str, Any]]):
+            return np.ones([attrs[0]["n"]]).astype(np.float32)
+
+        for batch in [1, 4]:
+            for [m, n] in [[28, 43]]:
+                dics = [
+                    {
+                        "in_num_col_dims": 1,
+                        "Input_scale": 0.1,
+                        "out_threshold": 0.1,
+                        "enable_int8": True,
+                        # for my conveinence
+                        "m": m,
+                        "n": n,
+                    },
+                    {}
+                ]
+
+                ops_config = [
+                    {
+                        "op_type": "fc",
+                        "op_inputs": {
+                            "Input": ["input_data"],
+                            "W": ["w_data"],
+                            "Bias": ["bias_data"]
+                        },
+                        "op_outputs": {
+                            "Out": ["output_data"]
+                        },
+                        "op_attrs": dics[0]
+                    },
+                ]
+
+                ops = self.generate_op_config(ops_config)
+
+                program_config = ProgramConfig(
+                    ops=ops,
+                    weights={
+                        "w_data":
+                        TensorConfig(data_gen=partial(generate_w, batch, dics)),
+                        "bias_data":
+                        TensorConfig(
+                            data_gen=partial(generate_bias, batch, dics))
+                    },
+                    inputs={
+                        "input_data":
+                        TensorConfig(
+                            data_gen=partial(generate_input1, batch, dics)),
+                    },
+                    outputs=["output_data"])
+
+                yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+
+        def generate_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [1, 14, 1, 2],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [4, 14, 1, 2],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [1, 14, 1, 2],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (1, 2), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+
+        # for dynamic_shape
+        generate_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (1, 2), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+        self.trt_param.precision = paddle_infer.PrecisionType.Int8
+        yield self.create_inference_config(), (1, 2), (1e-5, 1e-5)
+
+    def test(self):
+        self.run_test()
+
+    def test_quant(self):
+        self.run_test(quant=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab