diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 4d821469fad169f7ceb92658c8d98af71fd0f187..fb3f14bc58727d6211c0d1429b06cb47e76c5bbb 100755 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2120,6 +2120,7 @@ USE_TRT_CONVERTER(flatten); USE_TRT_CONVERTER(flatten_contiguous_range); USE_TRT_CONVERTER(matmul); USE_TRT_CONVERTER(matmul_v2); +USE_TRT_CONVERTER(bmm); USE_TRT_CONVERTER(conv2d); USE_TRT_CONVERTER(relu); USE_TRT_CONVERTER(exp); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 3a2fb52607890e92b10621be1cc3777c4a3f3e35..a40f2bd58d582c8ea24c1d3e7d2886f41b58ca3b 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -4,6 +4,7 @@ list( CONVERT_FILES matmul_op.cc matmul_v2_op.cc + bmm_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/bmm_op.cc b/paddle/fluid/inference/tensorrt/convert/bmm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..4f4751d8ca977d654c387d0c39de0e6aef3640bb --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/bmm_op.cc @@ -0,0 +1,59 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +class BMMOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + framework::OpDesc op_desc(op, nullptr); + nvinfer1::ILayer* layer = nullptr; + + // Declare inputs + auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); + auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]); + auto output_name = op_desc.Output("Out")[0]; + + layer = TRT_ENGINE_ADD_LAYER(engine_, + MatrixMultiply, + *input1, + nvinfer1::MatrixOperation::kNONE, + *input2, + nvinfer1::MatrixOperation::kNONE); + + RreplenishLayerAndOutput(layer, "bmm", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(bmm, BMMOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index ed7f1c691c9ff1b499b87dc6c6c8e52aa883edcc..90607240dce4bd00204bc8aa406bc62a801e3944 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -327,6 +327,12 @@ struct SimpleOpTypeSetTeller : public Teller { } } + if (op_type == "bmm") { + if (!with_dynamic_shape) { + return false; + } + } + if (op_type == "matmul_v2") { if (!with_dynamic_shape) { return false; @@ -2108,6 +2114,7 @@ struct SimpleOpTypeSetTeller : public Teller { "mul", "matmul", "matmul_v2", + "bmm", "conv2d", "conv2d_fusion", "pool2d", @@ -2220,6 +2227,7 @@ struct SimpleOpTypeSetTeller : public Teller { "mul", "matmul", "matmul_v2", + "bmm", "conv2d", "conv2d_fusion", "pool2d", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py new file mode 100644 index 0000000000000000000000000000000000000000..62bea6fbbc4bc2a876769ceb9e651d925cbaf292 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_bmm.py @@ -0,0 +1,127 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest +import os + + +class TrtConvertBmmTest_dynamic(TrtLayerAutoScanTest): + + def sample_program_configs(self): + + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for batch in [10, 11, 12, 13, 14, 15]: + input1_shape = [batch, 350, 75] + input2_shape = [batch, 75, 25] + dics = [{}] + ops_config = [{ + "op_type": "bmm", + "op_inputs": { + "X": ["input1_data"], + "Y": ["input2_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input1_data": + TensorConfig( + data_gen=partial(generate_input, input1_shape)), + "input2_data": + TensorConfig(data_gen=partial(generate_input, input2_shape)) + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input1_data": [10, 350, 75], + "input2_data": [10, 75, 25] + } + self.dynamic_shape.max_input_shape = { + "input1_data": [100, 350, 75], + "input2_data": [100, 75, 25] + } + self.dynamic_shape.opt_input_shape = { + "input1_data": [15, 350, 75], + "input2_data": [15, 75, 25] + } + + def clear_dynamic_shape(): + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + if dynamic_shape: + return 1, 3 + else: + return 0, 4 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False), 1e-5 + + # The output has little diff between gpu and trt in CI-Windows-Inference + tol_fp32 = 1e-4 + tol_half = 1e-4 + if (os.name == 'nt'): + tol_fp32 = 1e-2 + tol_half = 1e-2 + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), tol_fp32 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True), tol_half + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +if __name__ == "__main__": + unittest.main()