From f6b4ed22cb0c510848f511b9482954bca84e94b4 Mon Sep 17 00:00:00 2001
From: baoachun <962571062@qq.com>
Date: Fri, 29 Oct 2021 16:47:40 +0800
Subject: [PATCH] fix matmul error when input's dim is 3 (#36849)

---
 .../inference/tensorrt/convert/matmul_op.cc   |  45 +++-
 paddle/fluid/inference/tensorrt/op_teller.cc  |   2 +-
 .../ir/inference/test_trt_convert_matmul.py   | 213 ++++++++++++++++++
 3 files changed, 257 insertions(+), 3 deletions(-)
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py
diff --git a/paddle/fluid/inference/tensorrt/convert/matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/matmul_op.cc
index 0358c86926..7b017900a0 100644
--- a/paddle/fluid/inference/tensorrt/convert/matmul_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/matmul_op.cc
@@ -61,6 +61,38 @@ class MatMulOpConverter : public OpConverter {
     if (fabs(alpha - 1.0) < std::numeric_limits<float>::epsilon()) {
       engine_->SetITensor(output_name, layer->getOutput(0));
     } else {
+      // IScaleLayer requires the input must have at least
+      // three dimensions in static shape mode and at least
+      // four dimensions in dynamic shape mode.
+      auto* matmul_out = layer->getOutput(0);
+      nvinfer1::Dims out_shape = matmul_out->getDimensions();
+      const int out_dims = out_shape.nbDims;
+      bool need_change_dim = false;
+
+      if (engine_->with_dynamic_shape()) {
+        if (out_dims == 3) {
+          need_change_dim = true;
+        }
+      } else {
+        if (out_dims == 2) {
+          need_change_dim = true;
+        }
+      }
+
+      if (need_change_dim) {
+        nvinfer1::Dims reshape_dim;
+        reshape_dim.nbDims = out_dims + 1;
+        reshape_dim.d[out_dims] = 1;
+        for (int i = 0; i < out_dims; i++) {
+          reshape_dim.d[i] = out_shape.d[i];
+        }
+
+        auto* reshape_layer =
+            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *matmul_out);
+        reshape_layer->setReshapeDimensions(reshape_dim);
+        matmul_out = reshape_layer->getOutput(0);
+      }
+
       auto create_weights = [&](float data, const std::string& type) -> float* {
         std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
         tmp_tensor->Resize({1});
@@ -80,9 +112,18 @@ class MatMulOpConverter : public OpConverter {
       TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT,
                                       static_cast<void*>(power_data), 1};
       auto* scale_layer = TRT_ENGINE_ADD_LAYER(
-          engine_, Scale, *layer->getOutput(0), nvinfer1::ScaleMode::kUNIFORM,
+          engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM,
           nv_shift.get(), nv_alpha.get(), nv_power.get());
-      engine_->SetITensor(output_name, scale_layer->getOutput(0));
+      auto* scale_out = scale_layer->getOutput(0);
+
+      if (need_change_dim) {
+        auto* reshape_layer =
+            TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scale_out);
+        reshape_layer->setReshapeDimensions(out_shape);
+        scale_out = reshape_layer->getOutput(0);
+      }
+
+      engine_->SetITensor(output_name, scale_out);
     }
     if (test_mode) {  // the test framework can not determine which is the
                       // output, so place the declaration inside.
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index e9b1c90ab0..603c728207 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -1550,7 +1550,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
             !BOOST_GET_CONST(bool, desc.GetAttr("keep_dim")))
           return false;
       }
-      if (desc.HasAttr("reduce_all")) {
+      if (desc.HasAttr("out_dtype")) {
         int out_dtype = BOOST_GET_CONST(int32_t, desc.GetAttr("out_dtype"));
         if (out_dtype != -1) {
           return false;
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py
new file mode 100644
index 0000000000..8913159b2c
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py
@@ -0,0 +1,213 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+# 
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# 
+#     http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+import unittest
+
+
+class TrtConvertMatmulTest_static(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for batch in [1, 4]:
+            for trans_x in [True, False]:
+                for trans_y in [True, False]:
+                    if trans_x and trans_y:
+                        input1_shape = [batch, 6, 11]
+                        input2_shape = [batch, 32, 6]
+                    if trans_x and not trans_y:
+                        input1_shape = [batch, 6, 11]
+                        input2_shape = [batch, 6, 32]
+                    if not trans_x and trans_y:
+                        input1_shape = [batch, 32, 6]
+                        input2_shape = [batch, 11, 6]
+                    if not trans_x and not trans_y:
+                        input1_shape = [batch, 32, 6]
+                        input2_shape = [batch, 6, 11]
+                    for alpha in [0.3, 1.0]:
+                        dics = [{
+                            "transpose_X": trans_x,
+                            "transpose_Y": trans_y,
+                            "alpha": alpha,
+                            "fused_reshape_X": [],
+                            "fused_reshape_Y": [],
+                            "fused_transpose_X": [],
+                            "fused_transpose_Y": [],
+                            "fused_reshape_Out": [],
+                            "fused_transpose_Out": []
+                        }]
+                        ops_config = [{
+                            "op_type": "matmul",
+                            "op_inputs": {
+                                "X": ["input1_data"],
+                                "Y": ["input2_data"]
+                            },
+                            "op_outputs": {
+                                "Out": ["output_data"]
+                            },
+                            "op_attrs": dics[0]
+                        }]
+                        ops = self.generate_op_config(ops_config)
+
+                        program_config = ProgramConfig(
+                            ops=ops,
+                            weights={},
+                            inputs={
+                                "input1_data": TensorConfig(data_gen=partial(
+                                    generate_input, input1_shape)),
+                                "input2_data": TensorConfig(data_gen=partial(
+                                    generate_input, input2_shape))
+                            },
+                            outputs=["output_data"])
+
+                        yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            pass
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        # for static_shape
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (1, 3), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 3), 1e-5
+
+    def test(self):
+        self.run_test()
+
+
+class TrtConvertMatmulTest_dynamic(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for trans_x in [True]:
+            for trans_y in [True]:
+                if trans_x and trans_y:
+                    input1_shape = [4, 4, 4]
+                    input2_shape = [4, 4, 4]
+                # if trans_x and not trans_y:
+                #     input1_shape = [4, 4, 4]
+                #     input2_shape = [4, 4, 4]
+                # if not trans_x and trans_y:
+                #     input1_shape = [batch, 32, 6]
+                #     input2_shape = [batch, 11, 6]
+                # if not trans_x and not trans_y:
+                #     input1_shape = [batch, 32, 6]
+                #     input2_shape = [batch, 6, 11]
+                for alpha in [0.3, 1.0]:
+                    dics = [{
+                        "transpose_X": trans_x,
+                        "transpose_Y": trans_y,
+                        "alpha": alpha,
+                        "fused_reshape_X": [],
+                        "fused_reshape_Y": [],
+                        "fused_transpose_X": [],
+                        "fused_transpose_Y": [],
+                        "fused_reshape_Out": [],
+                        "fused_transpose_Out": []
+                    }]
+                    ops_config = [{
+                        "op_type": "matmul",
+                        "op_inputs": {
+                            "X": ["input1_data"],
+                            "Y": ["input2_data"]
+                        },
+                        "op_outputs": {
+                            "Out": ["output_data"]
+                        },
+                        "op_attrs": dics[0]
+                    }]
+                    ops = self.generate_op_config(ops_config)
+
+                    program_config = ProgramConfig(
+                        ops=ops,
+                        weights={},
+                        inputs={
+                            "input1_data": TensorConfig(
+                                data_gen=partial(generate_input, input1_shape)),
+                            "input2_data": TensorConfig(
+                                data_gen=partial(generate_input, input2_shape))
+                        },
+                        outputs=["output_data"])
+
+                    yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input1_data": [1, 4, 4],
+                "input2_data": [1, 4, 4]
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input1_data": [16, 4, 4],
+                "input2_data": [16, 4, 128]
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input1_data": [8, 4, 4],
+                "input2_data": [8, 4, 16]
+            }
+
+        attrs = [
+            program_config.ops[i].attrs
+            for i in range(len(program_config.ops))
+        ]
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), (1, 3), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), (1, 3), 1e-5
+
+    def add_skip_trt_case(self):
+        def teller1(program_config, predictor_config):
+            if len(
+                    self.dynamic_shape.min_input_shape
+            ) != 0 and self.trt_param.precision == paddle_infer.PrecisionType.Half:
+                return True
+            return False
+
+        self.add_skip_case(
+            teller1, SkipReasons.TRT_NOT_IMPLEMENTED,
+            "Tensorrt MatrixMultiply layer will get error when dynamic shape fp16 mode."
+        )
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab