From f6b4ed22cb0c510848f511b9482954bca84e94b4 Mon Sep 17 00:00:00 2001 From: baoachun <962571062@qq.com> Date: Fri, 29 Oct 2021 16:47:40 +0800 Subject: [PATCH] fix matmul error when input's dim is 3 (#36849) --- .../inference/tensorrt/convert/matmul_op.cc | 45 +++- paddle/fluid/inference/tensorrt/op_teller.cc | 2 +- .../ir/inference/test_trt_convert_matmul.py | 213 ++++++++++++++++++ 3 files changed, 257 insertions(+), 3 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py diff --git a/paddle/fluid/inference/tensorrt/convert/matmul_op.cc b/paddle/fluid/inference/tensorrt/convert/matmul_op.cc index 0358c86926..7b017900a0 100644 --- a/paddle/fluid/inference/tensorrt/convert/matmul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/matmul_op.cc @@ -61,6 +61,38 @@ class MatMulOpConverter : public OpConverter { if (fabs(alpha - 1.0) < std::numeric_limits::epsilon()) { engine_->SetITensor(output_name, layer->getOutput(0)); } else { + // IScaleLayer requires the input must have at least + // three dimensions in static shape mode and at least + // four dimensions in dynamic shape mode. + auto* matmul_out = layer->getOutput(0); + nvinfer1::Dims out_shape = matmul_out->getDimensions(); + const int out_dims = out_shape.nbDims; + bool need_change_dim = false; + + if (engine_->with_dynamic_shape()) { + if (out_dims == 3) { + need_change_dim = true; + } + } else { + if (out_dims == 2) { + need_change_dim = true; + } + } + + if (need_change_dim) { + nvinfer1::Dims reshape_dim; + reshape_dim.nbDims = out_dims + 1; + reshape_dim.d[out_dims] = 1; + for (int i = 0; i < out_dims; i++) { + reshape_dim.d[i] = out_shape.d[i]; + } + + auto* reshape_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *matmul_out); + reshape_layer->setReshapeDimensions(reshape_dim); + matmul_out = reshape_layer->getOutput(0); + } + auto create_weights = [&](float data, const std::string& type) -> float* { std::unique_ptr tmp_tensor(new framework::Tensor()); tmp_tensor->Resize({1}); @@ -80,9 +112,18 @@ class MatMulOpConverter : public OpConverter { TensorRTEngine::Weight nv_power{nvinfer1::DataType::kFLOAT, static_cast(power_data), 1}; auto* scale_layer = TRT_ENGINE_ADD_LAYER( - engine_, Scale, *layer->getOutput(0), nvinfer1::ScaleMode::kUNIFORM, + engine_, Scale, *matmul_out, nvinfer1::ScaleMode::kUNIFORM, nv_shift.get(), nv_alpha.get(), nv_power.get()); - engine_->SetITensor(output_name, scale_layer->getOutput(0)); + auto* scale_out = scale_layer->getOutput(0); + + if (need_change_dim) { + auto* reshape_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *scale_out); + reshape_layer->setReshapeDimensions(out_shape); + scale_out = reshape_layer->getOutput(0); + } + + engine_->SetITensor(output_name, scale_out); } if (test_mode) { // the test framework can not determine which is the // output, so place the declaration inside. diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index e9b1c90ab0..603c728207 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1550,7 +1550,7 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, !BOOST_GET_CONST(bool, desc.GetAttr("keep_dim"))) return false; } - if (desc.HasAttr("reduce_all")) { + if (desc.HasAttr("out_dtype")) { int out_dtype = BOOST_GET_CONST(int32_t, desc.GetAttr("out_dtype")); if (out_dtype != -1) { return false; diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py new file mode 100644 index 0000000000..8913159b2c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_matmul.py @@ -0,0 +1,213 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from trt_layer_auto_scan_test import TrtLayerAutoScanTest, SkipReasons +from program_config import TensorConfig, ProgramConfig +import numpy as np +import paddle.inference as paddle_infer +from functools import partial +from typing import Optional, List, Callable, Dict, Any, Set +import unittest + + +class TrtConvertMatmulTest_static(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for batch in [1, 4]: + for trans_x in [True, False]: + for trans_y in [True, False]: + if trans_x and trans_y: + input1_shape = [batch, 6, 11] + input2_shape = [batch, 32, 6] + if trans_x and not trans_y: + input1_shape = [batch, 6, 11] + input2_shape = [batch, 6, 32] + if not trans_x and trans_y: + input1_shape = [batch, 32, 6] + input2_shape = [batch, 11, 6] + if not trans_x and not trans_y: + input1_shape = [batch, 32, 6] + input2_shape = [batch, 6, 11] + for alpha in [0.3, 1.0]: + dics = [{ + "transpose_X": trans_x, + "transpose_Y": trans_y, + "alpha": alpha, + "fused_reshape_X": [], + "fused_reshape_Y": [], + "fused_transpose_X": [], + "fused_transpose_Y": [], + "fused_reshape_Out": [], + "fused_transpose_Out": [] + }] + ops_config = [{ + "op_type": "matmul", + "op_inputs": { + "X": ["input1_data"], + "Y": ["input2_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input1_data": TensorConfig(data_gen=partial( + generate_input, input1_shape)), + "input2_data": TensorConfig(data_gen=partial( + generate_input, input2_shape)) + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + pass + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), 1e-5 + + def test(self): + self.run_test() + + +class TrtConvertMatmulTest_dynamic(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input(shape): + return np.random.random(shape).astype(np.float32) + + for trans_x in [True]: + for trans_y in [True]: + if trans_x and trans_y: + input1_shape = [4, 4, 4] + input2_shape = [4, 4, 4] + # if trans_x and not trans_y: + # input1_shape = [4, 4, 4] + # input2_shape = [4, 4, 4] + # if not trans_x and trans_y: + # input1_shape = [batch, 32, 6] + # input2_shape = [batch, 11, 6] + # if not trans_x and not trans_y: + # input1_shape = [batch, 32, 6] + # input2_shape = [batch, 6, 11] + for alpha in [0.3, 1.0]: + dics = [{ + "transpose_X": trans_x, + "transpose_Y": trans_y, + "alpha": alpha, + "fused_reshape_X": [], + "fused_reshape_Y": [], + "fused_transpose_X": [], + "fused_transpose_Y": [], + "fused_reshape_Out": [], + "fused_transpose_Out": [] + }] + ops_config = [{ + "op_type": "matmul", + "op_inputs": { + "X": ["input1_data"], + "Y": ["input2_data"] + }, + "op_outputs": { + "Out": ["output_data"] + }, + "op_attrs": dics[0] + }] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input1_data": TensorConfig( + data_gen=partial(generate_input, input1_shape)), + "input2_data": TensorConfig( + data_gen=partial(generate_input, input2_shape)) + }, + outputs=["output_data"]) + + yield program_config + + def sample_predictor_configs( + self, program_config) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + self.dynamic_shape.min_input_shape = { + "input1_data": [1, 4, 4], + "input2_data": [1, 4, 4] + } + self.dynamic_shape.max_input_shape = { + "input1_data": [16, 4, 4], + "input2_data": [16, 4, 128] + } + self.dynamic_shape.opt_input_shape = { + "input1_data": [8, 4, 4], + "input2_data": [8, 4, 16] + } + + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (1, 3), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (1, 3), 1e-5 + + def add_skip_trt_case(self): + def teller1(program_config, predictor_config): + if len( + self.dynamic_shape.min_input_shape + ) != 0 and self.trt_param.precision == paddle_infer.PrecisionType.Half: + return True + return False + + self.add_skip_case( + teller1, SkipReasons.TRT_NOT_IMPLEMENTED, + "Tensorrt MatrixMultiply layer will get error when dynamic shape fp16 mode." + ) + + def test(self): + self.add_skip_trt_case() + self.run_test() + + +if __name__ == "__main__": + unittest.main() -- GitLab