From 8f1ac7cfc1117f89daeeb885147ffe0bf15675ff Mon Sep 17 00:00:00 2001
From: xiaoxiaohehe001 <49090790+xiaoxiaohehe001@users.noreply.github.com>
Date: Fri, 14 Oct 2022 17:47:59 +0800
Subject: [PATCH] Add bmm convert (#47011)

---
 .../fluid/inference/api/analysis_predictor.cc |   1 +
 .../inference/tensorrt/convert/CMakeLists.txt |   1 +
 .../inference/tensorrt/convert/bmm_op.cc      |  59 ++++++++
 paddle/fluid/inference/tensorrt/op_teller.cc  |   8 ++
 .../ir/inference/test_trt_convert_bmm.py      | 127 ++++++++++++++++++
 5 files changed, 196 insertions(+)
 create mode 100644 paddle/fluid/inference/tensorrt/convert/bmm_op.cc
 create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py

diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc
index 4d821469fad..fb3f14bc587 100755
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2120,6 +2120,7 @@ USE_TRT_CONVERTER(flatten);
 USE_TRT_CONVERTER(flatten_contiguous_range);
 USE_TRT_CONVERTER(matmul);
 USE_TRT_CONVERTER(matmul_v2);
+USE_TRT_CONVERTER(bmm);
 USE_TRT_CONVERTER(conv2d);
 USE_TRT_CONVERTER(relu);
 USE_TRT_CONVERTER(exp);
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 3a2fb526078..a40f2bd58d5 100644
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -4,6 +4,7 @@ list(
   CONVERT_FILES
   matmul_op.cc
   matmul_v2_op.cc
+  bmm_op.cc
   conv2d_op.cc
   fc_op.cc
   pool2d_op.cc
diff --git a/paddle/fluid/inference/tensorrt/convert/bmm_op.cc b/paddle/fluid/inference/tensorrt/convert/bmm_op.cc
new file mode 100644
index 00000000000..4f4751d8ca9
--- /dev/null
+++ b/paddle/fluid/inference/tensorrt/convert/bmm_op.cc
@@ -0,0 +1,59 @@
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+
+class BMMOpConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    framework::OpDesc op_desc(op, nullptr);
+    nvinfer1::ILayer* layer = nullptr;
+
+    // Declare inputs
+    auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
+    auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]);
+    auto output_name = op_desc.Output("Out")[0];
+
+    layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                 MatrixMultiply,
+                                 *input1,
+                                 nvinfer1::MatrixOperation::kNONE,
+                                 *input2,
+                                 nvinfer1::MatrixOperation::kNONE);
+
+    RreplenishLayerAndOutput(layer, "bmm", {output_name}, test_mode);
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(bmm, BMMOpConverter);
diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc
index ed7f1c691c9..90607240dce 100644
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -327,6 +327,12 @@ struct SimpleOpTypeSetTeller : public Teller {
       }
     }
 
+    if (op_type == "bmm") {
+      if (!with_dynamic_shape) {
+        return false;
+      }
+    }
+
     if (op_type == "matmul_v2") {
       if (!with_dynamic_shape) {
         return false;
@@ -2108,6 +2114,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "mul",
       "matmul",
       "matmul_v2",
+      "bmm",
       "conv2d",
       "conv2d_fusion",
       "pool2d",
@@ -2220,6 +2227,7 @@ struct SimpleOpTypeSetTeller : public Teller {
       "mul",
       "matmul",
       "matmul_v2",
+      "bmm",
       "conv2d",
       "conv2d_fusion",
       "pool2d",
diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py
new file mode 100644
index 00000000000..62bea6fbbc4
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py
@@ -0,0 +1,127 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons
+from program_config import TensorConfig, ProgramConfig
+import numpy as np
+import paddle.inference as paddle_infer
+from functools import partial
+from typing import Optional, List, Callable, Dict, Any, Set
+import unittest
+import os
+
+
+class TrtConvertBmmTest_dynamic(TrtLayerAutoScanTest):
+
+    def sample_program_configs(self):
+
+        def generate_input(shape):
+            return np.random.random(shape).astype(np.float32)
+
+        for batch in [10, 11, 12, 13, 14, 15]:
+            input1_shape = [batch, 350, 75]
+            input2_shape = [batch, 75, 25]
+            dics = [{}]
+            ops_config = [{
+                "op_type": "bmm",
+                "op_inputs": {
+                    "X": ["input1_data"],
+                    "Y": ["input2_data"]
+                },
+                "op_outputs": {
+                    "Out": ["output_data"]
+                },
+                "op_attrs": dics[0]
+            }]
+            ops = self.generate_op_config(ops_config)
+
+            program_config = ProgramConfig(
+                ops=ops,
+                weights={},
+                inputs={
+                    "input1_data":
+                    TensorConfig(
+                        data_gen=partial(generate_input, input1_shape)),
+                    "input2_data":
+                    TensorConfig(data_gen=partial(generate_input, input2_shape))
+                },
+                outputs=["output_data"])
+
+            yield program_config
+
+    def sample_predictor_configs(
+            self, program_config) -> (paddle_infer.Config, List[int], float):
+
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input1_data": [10, 350, 75],
+                "input2_data": [10, 75, 25]
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input1_data": [100, 350, 75],
+                "input2_data": [100, 75, 25]
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input1_data": [15, 350, 75],
+                "input2_data": [15, 75, 25]
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if dynamic_shape:
+                return 1, 3
+            else:
+                return 0, 4
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        clear_dynamic_shape()
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, False), 1e-5
+
+        # The output has little diff between gpu and trt in CI-Windows-Inference
+        tol_fp32 = 1e-4
+        tol_half = 1e-4
+        if (os.name == 'nt'):
+            tol_fp32 = 1e-2
+            tol_half = 1e-2
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), tol_fp32
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True), tol_half
+
+    def add_skip_trt_case(self):
+        pass
+
+    def test(self):
+        self.add_skip_trt_case()
+        self.run_test()
+
+
+if __name__ == "__main__":
+    unittest.main()
-- 
GitLab