diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 445145dde3954ce864abaaa2c8e5999502c0ccb1..33b3da0717bec3e5f84f6a584cbd96c9341ca38a 100755 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2107,6 +2107,7 @@ USE_TRT_CONVERTER(transpose2); USE_TRT_CONVERTER(flatten); USE_TRT_CONVERTER(flatten_contiguous_range); USE_TRT_CONVERTER(matmul); +USE_TRT_CONVERTER(matmul_v2); USE_TRT_CONVERTER(conv2d); USE_TRT_CONVERTER(relu); USE_TRT_CONVERTER(exp); diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 60a5d0f282525c730819d78a306018fb2d041e46..3a2fb52607890e92b10621be1cc3777c4a3f3e35 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -3,6 +3,7 @@ list( APPEND CONVERT_FILES matmul_op.cc + matmul_v2_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/matmul_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/matmul_v2_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..c6f5a42a7da8917ce4ed26354e7febbfeec72e73 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/matmul_v2_op.cc @@ -0,0 +1,118 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h" + +namespace paddle { +namespace framework { +class Scope; + +namespace proto { +class OpDesc; +} // namespace proto +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * MatMulV2Op, IMatrixMultiplyLayer in TRT. This Layer doesn't has weights. + */ +class MatMulV2OpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, + bool test_mode) override { + VLOG(3) << "convert a fluid matmul_v2 op to tensorrt matmul layer "; + framework::OpDesc op_desc(op, nullptr); + nvinfer1::ILayer* layer = nullptr; + + // Declare inputs + auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); + auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]); + + nvinfer1::Dims dims_x = input1->getDimensions(); + nvinfer1::Dims dims_y = input2->getDimensions(); + + bool transpose_X = PADDLE_GET_CONST(bool, op_desc.GetAttr("trans_x")); + bool transpose_Y = PADDLE_GET_CONST(bool, op_desc.GetAttr("trans_y")); + + auto output_name = op_desc.Output("Out")[0]; + + nvinfer1::MatrixOperation matrix_operation_X = + transpose_X ? nvinfer1::MatrixOperation::kTRANSPOSE + : nvinfer1::MatrixOperation::kNONE; + nvinfer1::MatrixOperation matrix_operation_Y = + transpose_Y ? nvinfer1::MatrixOperation::kTRANSPOSE + : nvinfer1::MatrixOperation::kNONE; + + int one_num = 0; + nvinfer1::ITensor* new_shape_tensor = nullptr; + if (dims_x.nbDims < dims_y.nbDims) { + one_num = dims_y.nbDims - dims_x.nbDims; + new_shape_tensor = Shape(input1); + std::vector one_vec(one_num, 1); + auto* one_tensor = Add1DConstantLayer(one_vec); + new_shape_tensor = + Concat(std::vector{one_tensor, new_shape_tensor}); + + auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input1); + reshape_layer->setInput(1, *new_shape_tensor); + + layer = TRT_ENGINE_ADD_LAYER(engine_, + MatrixMultiply, + *reshape_layer->getOutput(0), + matrix_operation_X, + *input2, + matrix_operation_Y); + + } else if (dims_x.nbDims > dims_y.nbDims) { + one_num = dims_x.nbDims - dims_y.nbDims; + new_shape_tensor = Shape(input2); + std::vector one_vec(one_num, 1); + auto* one_tensor = Add1DConstantLayer(one_vec); + new_shape_tensor = + Concat(std::vector{one_tensor, new_shape_tensor}); + auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input2); + reshape_layer->setInput(1, *new_shape_tensor); + + layer = TRT_ENGINE_ADD_LAYER(engine_, + MatrixMultiply, + *input1, + matrix_operation_X, + *reshape_layer->getOutput(0), + matrix_operation_Y); + + } else { + layer = TRT_ENGINE_ADD_LAYER(engine_, + MatrixMultiply, + *input1, + matrix_operation_X, + *input2, + matrix_operation_Y); + } + VLOG(3) << "Convert a fluid matmul_v2_op_float to TensorRT "; + + RreplenishLayerAndOutput(layer, "matmul_v2_op", {output_name}, test_mode); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(matmul_v2, MatMulV2OpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 55457aa5827e41b01ac24c27abecb9582167ab85..3db57ea19637b1dfdc4169abbf265e66bb7e0616 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -326,6 +326,20 @@ struct SimpleOpTypeSetTeller : public Teller { } } + if (op_type == "matmul_v2") { + if (!with_dynamic_shape) { + return false; + } + auto* block = desc.Block(); + if (block == nullptr) { + VLOG(3) << "The block desc is nullptr, we can't continue to analyze. " + "Developers need to check whether block_desc is passed in " + "the pass."; + return false; + } + return true; + } + if (op_type == "matmul") { auto* block = desc.Block(); if (block == nullptr) { @@ -2081,6 +2095,7 @@ struct SimpleOpTypeSetTeller : public Teller { std::unordered_set int8_teller_set{ "mul", "matmul", + "matmul_v2", "conv2d", "conv2d_fusion", "pool2d", @@ -2190,6 +2205,7 @@ struct SimpleOpTypeSetTeller : public Teller { std::unordered_set teller_set{ "mul", "matmul", + "matmul_v2", "conv2d", "conv2d_fusion", "pool2d", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul_v2.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..b5d94ebfe3c6fb40dee5a729e52db8029947af79 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul_v2.py @@ -0,0 +1,197 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest +import os + + +class TrtConvertMatmulTest_dynamic(TrtLayerAutoScanTest): + + def sample_program_configs(self): + + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for batch in [10, 11, 12, 13, 14, 15]: + for trans_x in [False]: + for trans_y in [False]: + input1_shape = [batch, 64, 350, 75] + input2_shape = [75, 25] + dics = [{ + "trans_x": trans_x, + "trans_y": trans_y, + }] + ops_config = [{ + "op_type": "matmul_v2", + "op_inputs": { + "X": ["input1_data"], + "Y": ["input2_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input1_data": + TensorConfig( + data_gen=partial(generate_input, input1_shape)), + "input2_data": + TensorConfig( + data_gen=partial(generate_input, input2_shape)) + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input1_data": [10, 64, 350, 75], + "input2_data": [75, 25] + } + self.dynamic_shape.max_input_shape = { + "input1_data": [100, 64, 350, 75], + "input2_data": [75, 25] + } + self.dynamic_shape.opt_input_shape = { + "input1_data": [15, 64, 350, 75], + "input2_data": [75, 25] + } + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # The output has little diff between gpu and trt in CI-Windows-Inference + tol_fp32 = 1e-5 + tol_half = 1e-5 + if (os.name == 'nt'): + tol_fp32 = 1e-3 + tol_half = 1e-3 + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), tol_fp32 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), tol_half + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +class TrtConvertMatmulTest_dynamic2(TrtLayerAutoScanTest): + + def sample_program_configs(self): + + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for batch in [10, 11, 12, 13, 14, 15]: + for trans_x in [False]: + for trans_y in [False]: + input1_shape = [60, 40] + input2_shape = [batch, 40, 90] + dics = [{ + "trans_x": trans_x, + "trans_y": trans_y, + }] + ops_config = [{ + "op_type": "matmul_v2", + "op_inputs": { + "X": ["input1_data"], + "Y": ["input2_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input1_data": + TensorConfig( + data_gen=partial(generate_input, input1_shape)), + "input2_data": + TensorConfig( + data_gen=partial(generate_input, input2_shape)) + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input1_data": [60, 40], + "input2_data": [10, 40, 90] + } + self.dynamic_shape.max_input_shape = { + "input1_data": [60, 40], + "input2_data": [20, 40, 90] + } + self.dynamic_shape.opt_input_shape = { + "input1_data": [60, 40], + "input2_data": [15, 40, 90] + } + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + # The output has little diff between gpu and trt in CI-Windows-Inference + tol_fp32 = 1e-5 + tol_half = 1e-5 + if (os.name == 'nt'): + tol_fp32 = 1e-3 + tol_half = 1e-3 + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), tol_fp32 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), tol_half + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +if __name__ == "__main__": + unittest.main()